diff --git a/03_classification.ipynb b/03_classification.ipynb index ca683d8..f088b87 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -2342,7 +2342,7 @@ " for url in urls:\n", " text = text.replace(url, \" URL \")\n", " if self.replace_numbers:\n", - " text = re.sub(r'\\d+(?:\\.\\d*(?:[eE]\\d+))?', 'NUMBER', text)\n", + " text = re.sub(r'\\d+(?:\\.\\d*)?(?:[eE][+-]?\\d+)?', 'NUMBER', text)\n", " if self.remove_punctuation:\n", " text = re.sub(r'\\W+', ' ', text, flags=re.M)\n", " word_counts = Counter(text.split())\n",