From 6ac9776eaa78d7d7ff7bb6c741c6dd353f2fd4a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Sat, 25 Jan 2020 16:37:00 +1300 Subject: [PATCH] Install urlextract if on Colab for exercise 4 and fix the word count --- 03_classification.ipynb | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/03_classification.ipynb b/03_classification.ipynb index 048db1e..d124fd2 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -2274,6 +2274,20 @@ "execution_count": 149, "metadata": {}, "outputs": [], + "source": [ + "# if running this notebook on Colab, we just pip install urlextract\n", + "try:\n", + " import google.colab\n", + " !pip install -q -U urlextract\n", + "except ImportError:\n", + " pass # not running on Colab" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [], "source": [ "try:\n", " import urlextract # may require an Internet connection to download root domain names\n", @@ -2294,7 +2308,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -2346,7 +2360,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 152, "metadata": {}, "outputs": [], "source": [ @@ -2371,7 +2385,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 153, "metadata": {}, "outputs": [], "source": [ @@ -2403,7 +2417,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 154, "metadata": {}, "outputs": [], "source": [ @@ -2414,7 +2428,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 155, "metadata": {}, "outputs": [], "source": [ @@ -2425,12 +2439,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "What does this matrix mean? Well, the 64 in the third row, first column, means that the third email contains 64 words that are not part of the vocabulary. The 1 next to it means that the first word in the vocabulary is present once in this email. The 2 next to it means that the second word is present twice, and so on. You can look at the vocabulary to know which words we are talking about. The first word is \"of\", the second word is \"and\", etc." + "What does this matrix mean? Well, the 99 in the second row, first column, means that the second email contains 99 words that are not part of the vocabulary. The 11 next to it means that the first word in the vocabulary is present 11 times in this email. The 9 next to it means that the second word is present 9 times, and so on. You can look at the vocabulary to know which words we are talking about. The first word is \"the\", the second word is \"of\", etc." ] }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ @@ -2446,7 +2460,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -2469,7 +2483,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 158, "metadata": {}, "outputs": [], "source": [ @@ -2492,7 +2506,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 159, "metadata": {}, "outputs": [], "source": [