Better regex to match numbers in 03_classification
The previous regex would not match any decimal number where there is no "E" notation. Also added the option "+/-" sign in the "E" part.main
parent
a102114c62
commit
d7afbd511d
|
@ -2342,7 +2342,7 @@
|
||||||
" for url in urls:\n",
|
" for url in urls:\n",
|
||||||
" text = text.replace(url, \" URL \")\n",
|
" text = text.replace(url, \" URL \")\n",
|
||||||
" if self.replace_numbers:\n",
|
" if self.replace_numbers:\n",
|
||||||
" text = re.sub(r'\\d+(?:\\.\\d*(?:[eE]\\d+))?', 'NUMBER', text)\n",
|
" text = re.sub(r'\\d+(?:\\.\\d*)?(?:[eE][+-]?\\d+)?', 'NUMBER', text)\n",
|
||||||
" if self.remove_punctuation:\n",
|
" if self.remove_punctuation:\n",
|
||||||
" text = re.sub(r'\\W+', ' ', text, flags=re.M)\n",
|
" text = re.sub(r'\\W+', ' ', text, flags=re.M)\n",
|
||||||
" word_counts = Counter(text.split())\n",
|
" word_counts = Counter(text.split())\n",
|
||||||
|
|
Loading…
Reference in New Issue