Merge branch 'master' into changes-chap10
commit
4e915ba6c2
|
@ -5,6 +5,7 @@
|
|||
*.pyc
|
||||
.DS_Store
|
||||
.ipynb_checkpoints
|
||||
.vscode/
|
||||
checkpoint
|
||||
logs/*
|
||||
tf_logs/*
|
||||
|
|
|
@ -124,7 +124,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Download the data\n",
|
||||
"import urllib\n",
|
||||
"import urllib.request\n",
|
||||
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
"os.makedirs(datapath, exist_ok=True)\n",
|
||||
"for filename in (\"oecd_bli_2015.csv\", \"gdp_per_capita.csv\"):\n",
|
||||
|
@ -785,7 +785,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -73,11 +73,7 @@
|
|||
" print(\"Saving figure\", fig_id)\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
|
||||
"\n",
|
||||
"# Ignore useless warnings (see SciPy issue #5998)\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -95,7 +91,7 @@
|
|||
"source": [
|
||||
"import os\n",
|
||||
"import tarfile\n",
|
||||
"import urllib\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
"HOUSING_PATH = os.path.join(\"datasets\", \"housing\")\n",
|
||||
|
@ -524,8 +520,7 @@
|
|||
"ax = housing.plot(kind=\"scatter\", x=\"longitude\", y=\"latitude\", figsize=(10,7),\n",
|
||||
" s=housing['population']/100, label=\"Population\",\n",
|
||||
" c=\"median_house_value\", cmap=plt.get_cmap(\"jet\"),\n",
|
||||
" colorbar=False, alpha=0.4,\n",
|
||||
" )\n",
|
||||
" colorbar=False, alpha=0.4)\n",
|
||||
"plt.imshow(california_img, extent=[-124.55, -113.80, 32.45, 42.05], alpha=0.5,\n",
|
||||
" cmap=plt.get_cmap(\"jet\"))\n",
|
||||
"plt.ylabel(\"Latitude\", fontsize=14)\n",
|
||||
|
@ -949,11 +944,36 @@
|
|||
"housing_extra_attribs = attr_adder.transform(housing.values)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that I hard coded the indices (3, 4, 5, 6) for concision and clarity in the book, but it would be much cleaner to get them dynamically, like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"col_names = \"total_rooms\", \"total_bedrooms\", \"population\", \"households\"\n",
|
||||
"rooms_ix, bedrooms_ix, population_ix, households_ix = [\n",
|
||||
" housing.columns.get_loc(c) for c in col_names] # get the column indices"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Also, `housing_extra_attribs` is a NumPy array, we've lost the column names (unfortunately, that's a problem with Scikit-Learn). To recover a `DataFrame`, you could run this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_extra_attribs = pd.DataFrame(\n",
|
||||
" housing_extra_attribs,\n",
|
||||
|
@ -971,7 +991,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -989,7 +1009,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -998,7 +1018,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1017,7 +1037,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1026,7 +1046,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1042,7 +1062,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1067,7 +1087,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1089,7 +1109,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1103,7 +1123,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1120,7 +1140,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1136,7 +1156,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1148,7 +1168,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1169,7 +1189,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 85,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1178,7 +1198,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"execution_count": 86,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1187,7 +1207,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"execution_count": 87,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1199,9 +1219,16 @@
|
|||
"lin_rmse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: since Scikit-Learn 0.22, you can get the RMSE directly by calling the `mean_squared_error()` function with `squared=False`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"execution_count": 88,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1213,7 +1240,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 89,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1225,7 +1252,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1244,7 +1271,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1257,7 +1284,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1271,7 +1298,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1290,7 +1317,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1302,7 +1329,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"execution_count": 95,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1314,7 +1341,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 95,
|
||||
"execution_count": 96,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1328,7 +1355,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1338,7 +1365,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1354,7 +1381,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1384,7 +1411,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1393,7 +1420,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1409,7 +1436,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"execution_count": 102,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1420,7 +1447,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1429,7 +1456,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"execution_count": 104,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1449,7 +1476,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1460,7 +1487,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1470,7 +1497,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": 107,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1484,7 +1511,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1502,7 +1529,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"execution_count": 109,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1518,7 +1545,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"execution_count": 110,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1540,7 +1567,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"execution_count": 111,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1560,7 +1587,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1585,7 +1612,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"execution_count": 113,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1607,7 +1634,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"execution_count": 114,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1616,7 +1643,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 114,
|
||||
"execution_count": 115,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1635,7 +1662,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 115,
|
||||
"execution_count": 116,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1669,9 +1696,16 @@
|
|||
"Question: Try a Support Vector Machine regressor (`sklearn.svm.SVR`), with various hyperparameters such as `kernel=\"linear\"` (with various values for the `C` hyperparameter) or `kernel=\"rbf\"` (with various values for the `C` and `gamma` hyperparameters). Don't worry about what these hyperparameters mean for now. How does the best `SVR` predictor perform?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell may take close to 30 minutes to run, or more depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"execution_count": 117,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1697,7 +1731,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 117,
|
||||
"execution_count": 118,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1715,7 +1749,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 118,
|
||||
"execution_count": 119,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1743,9 +1777,16 @@
|
|||
"Question: Try replacing `GridSearchCV` with `RandomizedSearchCV`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell may take close to 45 minutes to run, or more depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"execution_count": 120,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1778,7 +1819,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 120,
|
||||
"execution_count": 121,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1796,7 +1837,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"execution_count": 122,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1819,7 +1860,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"execution_count": 123,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1844,7 +1885,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 123,
|
||||
"execution_count": 124,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1883,7 +1924,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 125,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1919,7 +1960,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"execution_count": 126,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1935,7 +1976,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 126,
|
||||
"execution_count": 127,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1945,7 +1986,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 127,
|
||||
"execution_count": 128,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1961,7 +2002,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 128,
|
||||
"execution_count": 129,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1977,7 +2018,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 129,
|
||||
"execution_count": 130,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1989,7 +2030,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 130,
|
||||
"execution_count": 131,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2005,7 +2046,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 131,
|
||||
"execution_count": 132,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2021,7 +2062,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 132,
|
||||
"execution_count": 133,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2051,7 +2092,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 133,
|
||||
"execution_count": 134,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2064,7 +2105,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 134,
|
||||
"execution_count": 135,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2080,7 +2121,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 135,
|
||||
"execution_count": 136,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2112,9 +2153,16 @@
|
|||
"Question: Automatically explore some preparation options using `GridSearchCV`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell may take close to 45 minutes to run, or more depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 136,
|
||||
"execution_count": 137,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2130,7 +2178,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"execution_count": 138,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2168,7 +2216,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "279px",
|
||||
|
|
|
@ -84,6 +84,13 @@
|
|||
"# MNIST"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning:** since Scikit-Learn 0.24, `fetch_openml()` returns a Pandas `DataFrame` by default. To avoid this and keep the same code as in the book, we use `as_frame=False`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
|
@ -91,7 +98,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_openml\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, as_frame=False)\n",
|
||||
"mnist.keys()"
|
||||
]
|
||||
},
|
||||
|
@ -291,7 +298,7 @@
|
|||
"from sklearn.model_selection import StratifiedKFold\n",
|
||||
"from sklearn.base import clone\n",
|
||||
"\n",
|
||||
"skfolds = StratifiedKFold(n_splits=3, random_state=42)\n",
|
||||
"skfolds = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)\n",
|
||||
"\n",
|
||||
"for train_index, test_index in skfolds.split(X_train, y_train_5):\n",
|
||||
" clone_clf = clone(sgd_clf)\n",
|
||||
|
@ -306,6 +313,13 @@
|
|||
" print(n_correct / len(y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: `shuffle=True` was omitted by mistake in previous releases of the book."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
|
@ -330,6 +344,17 @@
|
|||
"cross_val_score(never_5_clf, X_train, y_train_5, cv=3, scoring=\"accuracy\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: this output (and many others in this notebook and other notebooks) may differ slightly from those in the book. Don't worry, that's okay! There are several reasons for this:\n",
|
||||
"* first, Scikit-Learn and other libraries evolve, and algorithms get tweaked a bit, which may change the exact result you get. If you use the latest Scikit-Learn version (and in general, you really should), you probably won't be using the exact same version I used when I wrote the book or this notebook, hence the difference. I try to keep this notebook reasonably up to date, but I can't change the numbers on the pages in your copy of the book.\n",
|
||||
"* second, many training algorithms are stochastic, meaning they rely on randomness. In principle, it's possible to get consistent outputs from a random number generator by setting the seed from which it generates the pseudo-random numbers (which is why you will see `random_state=42` or `np.random.seed(42)` pretty often). However, sometimes this does not suffice due to the other factors listed here.\n",
|
||||
"* third, if the training algorithm runs across multiple threads (as do some algorithms implemented in C) or across multiple processes (e.g., when using the `n_jobs` argument), then the precise order in which operations will run is not always guaranteed, and thus the exact result may vary slightly.\n",
|
||||
"* lastly, other things may prevent perfect reproducibility, such as Python dicts and sets whose order is not guaranteed to be stable across sessions, or the order of files in a directory which is also not guaranteed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
|
@ -379,7 +404,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"4096 / (4096 + 1522)"
|
||||
"cm = confusion_matrix(y_train_5, y_train_pred)\n",
|
||||
"cm[1, 1] / (cm[0, 1] + cm[1, 1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -397,7 +423,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"4096 / (4096 + 1325)"
|
||||
"cm[1, 1] / (cm[1, 0] + cm[1, 1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -417,7 +443,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"4096 / (4096 + (1522 + 1325) / 2)"
|
||||
"cm[1, 1] / (cm[1, 1] + (cm[1, 0] + cm[0, 1]) / 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -536,9 +562,9 @@
|
|||
"\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plot_precision_vs_recall(precisions, recalls)\n",
|
||||
"plt.plot([0.4368, 0.4368], [0., 0.9], \"r:\")\n",
|
||||
"plt.plot([0.0, 0.4368], [0.9, 0.9], \"r:\")\n",
|
||||
"plt.plot([0.4368], [0.9], \"ro\")\n",
|
||||
"plt.plot([recall_90_precision, recall_90_precision], [0., 0.9], \"r:\")\n",
|
||||
"plt.plot([0.0, recall_90_precision], [0.9, 0.9], \"r:\")\n",
|
||||
"plt.plot([recall_90_precision], [0.9], \"ro\")\n",
|
||||
"save_fig(\"precision_vs_recall_plot\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
|
@ -622,9 +648,10 @@
|
|||
"\n",
|
||||
"plt.figure(figsize=(8, 6)) # Not shown\n",
|
||||
"plot_roc_curve(fpr, tpr)\n",
|
||||
"plt.plot([4.837e-3, 4.837e-3], [0., 0.4368], \"r:\") # Not shown\n",
|
||||
"plt.plot([0.0, 4.837e-3], [0.4368, 0.4368], \"r:\") # Not shown\n",
|
||||
"plt.plot([4.837e-3], [0.4368], \"ro\") # Not shown\n",
|
||||
"fpr_90 = fpr[np.argmax(tpr >= recall_90_precision)] # Not shown\n",
|
||||
"plt.plot([fpr_90, fpr_90], [0., recall_90_precision], \"r:\") # Not shown\n",
|
||||
"plt.plot([0.0, fpr_90], [recall_90_precision, recall_90_precision], \"r:\") # Not shown\n",
|
||||
"plt.plot([fpr_90], [recall_90_precision], \"ro\") # Not shown\n",
|
||||
"save_fig(\"roc_curve_plot\") # Not shown\n",
|
||||
"plt.show()"
|
||||
]
|
||||
|
@ -675,14 +702,16 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"recall_for_forest = tpr_forest[np.argmax(fpr_forest >= fpr_90)]\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(fpr, tpr, \"b:\", linewidth=2, label=\"SGD\")\n",
|
||||
"plot_roc_curve(fpr_forest, tpr_forest, \"Random Forest\")\n",
|
||||
"plt.plot([4.837e-3, 4.837e-3], [0., 0.4368], \"r:\")\n",
|
||||
"plt.plot([0.0, 4.837e-3], [0.4368, 0.4368], \"r:\")\n",
|
||||
"plt.plot([4.837e-3], [0.4368], \"ro\")\n",
|
||||
"plt.plot([4.837e-3, 4.837e-3], [0., 0.9487], \"r:\")\n",
|
||||
"plt.plot([4.837e-3], [0.9487], \"ro\")\n",
|
||||
"plt.plot([fpr_90, fpr_90], [0., recall_90_precision], \"r:\")\n",
|
||||
"plt.plot([0.0, fpr_90], [recall_90_precision, recall_90_precision], \"r:\")\n",
|
||||
"plt.plot([fpr_90], [recall_90_precision], \"ro\")\n",
|
||||
"plt.plot([fpr_90, fpr_90], [0., recall_for_forest], \"r:\")\n",
|
||||
"plt.plot([fpr_90], [recall_for_forest], \"ro\")\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.legend(loc=\"lower right\", fontsize=16)\n",
|
||||
"save_fig(\"roc_curve_comparison_plot\")\n",
|
||||
|
@ -814,6 +843,13 @@
|
|||
"sgd_clf.decision_function([some_digit])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following two cells may take close to 30 minutes to run, or more depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
|
@ -835,6 +871,13 @@
|
|||
"cross_val_score(sgd_clf, X_train_scaled, y_train, cv=3, scoring=\"accuracy\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Error analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
|
@ -1031,7 +1074,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.dummy import DummyClassifier\n",
|
||||
"dmy_clf = DummyClassifier()\n",
|
||||
"dmy_clf = DummyClassifier(strategy=\"prior\")\n",
|
||||
"y_probas_dmy = cross_val_predict(dmy_clf, X_train, y_train_5, cv=3, method=\"predict_proba\")\n",
|
||||
"y_scores_dmy = y_probas_dmy[:, 1]"
|
||||
]
|
||||
|
@ -1180,7 +1223,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the next cell may take hours to run, depending on your hardware."
|
||||
"**Warning**: the next cell may take close to 16 hours to run, or more depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1326,6 +1369,13 @@
|
|||
"knn_clf.fit(X_train_augmented, y_train_augmented)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell may take close to an hour to run, depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
|
@ -1896,7 +1946,7 @@
|
|||
"source": [
|
||||
"import os\n",
|
||||
"import tarfile\n",
|
||||
"import urllib\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"DOWNLOAD_ROOT = \"http://spamassassin.apache.org/old/publiccorpus/\"\n",
|
||||
"HAM_URL = DOWNLOAD_ROOT + \"20030228_easy_ham.tar.bz2\"\n",
|
||||
|
@ -2134,7 +2184,7 @@
|
|||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X = np.array(ham_emails + spam_emails)\n",
|
||||
"X = np.array(ham_emails + spam_emails, dtype=object)\n",
|
||||
"y = np.array([0] * len(ham_emails) + [1] * len(spam_emails))\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
|
@ -2495,7 +2545,7 @@
|
|||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.model_selection import cross_val_score\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", max_iter=1000, random_state=42)\n",
|
||||
"score = cross_val_score(log_clf, X_train_transformed, y_train, cv=3, verbose=3)\n",
|
||||
"score.mean()"
|
||||
]
|
||||
|
@ -2504,7 +2554,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Over 98.7%, not bad for a first try! :) However, remember that we are using the \"easy\" dataset. You can try with the harder datasets, the results won't be so amazing. You would have to try multiple models, select the best ones and fine-tune them using cross-validation, and so on.\n",
|
||||
"Over 98.5%, not bad for a first try! :) However, remember that we are using the \"easy\" dataset. You can try with the harder datasets, the results won't be so amazing. You would have to try multiple models, select the best ones and fine-tune them using cross-validation, and so on.\n",
|
||||
"\n",
|
||||
"But you get the picture, so let's stop now, and just print out the precision/recall we get on the test set:"
|
||||
]
|
||||
|
@ -2519,7 +2569,7 @@
|
|||
"\n",
|
||||
"X_test_transformed = preprocess_pipeline.transform(X_test)\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", max_iter=1000, random_state=42)\n",
|
||||
"log_clf.fit(X_train_transformed, y_train)\n",
|
||||
"\n",
|
||||
"y_pred = log_clf.predict(X_test_transformed)\n",
|
||||
|
@ -2552,7 +2602,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -79,11 +79,7 @@
|
|||
" print(\"Saving figure\", fig_id)\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
|
||||
"\n",
|
||||
"# Ignore useless warnings (see SciPy issue #5998)\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1797,7 +1793,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.8"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -1109,7 +1109,7 @@
|
|||
" self.Js.append(J)\n",
|
||||
"\n",
|
||||
" w_gradient_vector = w - self.C * np.sum(X_t_sv, axis=0).reshape(-1, 1)\n",
|
||||
" b_derivative = -C * np.sum(t_sv)\n",
|
||||
" b_derivative = -self.C * np.sum(t_sv)\n",
|
||||
" \n",
|
||||
" w = w - self.eta(epoch) * w_gradient_vector\n",
|
||||
" b = b - self.eta(epoch) * b_derivative\n",
|
||||
|
@ -1381,6 +1381,13 @@
|
|||
"First, let's load the dataset and split it into a training set and a test set. We could use `train_test_split()` but people usually just take the first 60,000 instances for the training set, and the last 10,000 instances for the test set (this makes it possible to compare your model's performance with others): "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning:** since Scikit-Learn 0.24, `fetch_openml()` returns a Pandas `DataFrame` by default. To avoid this, we use `as_frame=False`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
|
@ -1388,7 +1395,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_openml\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, cache=True)\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)\n",
|
||||
"\n",
|
||||
"X = mnist[\"data\"]\n",
|
||||
"y = mnist[\"target\"].astype(np.uint8)\n",
|
||||
|
@ -1566,7 +1573,14 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This looks pretty low but remember we only trained the model on 1,000 instances. Let's retrain the best estimator on the whole training set (run this at night, it will take hours):"
|
||||
"This looks pretty low but remember we only trained the model on 1,000 instances. Let's retrain the best estimator on the whole training set:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell may take hours to run, depending on your hardware."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1830,7 +1844,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -729,7 +729,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.8"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "309px",
|
||||
|
|
|
@ -181,6 +181,13 @@
|
|||
" print(clf.__class__.__name__, accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: the results in this notebook may differ slightly from the book, as Scikit-Learn algorithms sometimes get tweaked."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -445,6 +452,13 @@
|
|||
"## Feature importance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning:** since Scikit-Learn 0.24, `fetch_openml()` returns a Pandas `DataFrame` by default. To avoid this and keep the same code as in the book, we use `as_frame=False`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
|
@ -453,7 +467,7 @@
|
|||
"source": [
|
||||
"from sklearn.datasets import fetch_openml\n",
|
||||
"\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, as_frame=False)\n",
|
||||
"mnist.target = mnist.target.astype(np.uint8)"
|
||||
]
|
||||
},
|
||||
|
@ -535,21 +549,26 @@
|
|||
"\n",
|
||||
"fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)\n",
|
||||
"for subplot, learning_rate in ((0, 1), (1, 0.5)):\n",
|
||||
" sample_weights = np.ones(m)\n",
|
||||
" sample_weights = np.ones(m) / m\n",
|
||||
" plt.sca(axes[subplot])\n",
|
||||
" for i in range(5):\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"scale\", random_state=42)\n",
|
||||
" svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.2, gamma=0.6, random_state=42)\n",
|
||||
" svm_clf.fit(X_train, y_train, sample_weight=sample_weights * m)\n",
|
||||
" y_pred = svm_clf.predict(X_train)\n",
|
||||
" sample_weights[y_pred != y_train] *= (1 + learning_rate)\n",
|
||||
"\n",
|
||||
" r = sample_weights[y_pred != y_train].sum() / sample_weights.sum() # equation 7-1\n",
|
||||
" alpha = learning_rate * np.log((1 - r) / r) # equation 7-2\n",
|
||||
" sample_weights[y_pred != y_train] *= np.exp(alpha) # equation 7-3\n",
|
||||
" sample_weights /= sample_weights.sum() # normalization step\n",
|
||||
"\n",
|
||||
" plot_decision_boundary(svm_clf, X, y, alpha=0.2)\n",
|
||||
" plt.title(\"learning_rate = {}\".format(learning_rate), fontsize=16)\n",
|
||||
" if subplot == 0:\n",
|
||||
" plt.text(-0.7, -0.65, \"1\", fontsize=14)\n",
|
||||
" plt.text(-0.6, -0.10, \"2\", fontsize=14)\n",
|
||||
" plt.text(-0.5, 0.10, \"3\", fontsize=14)\n",
|
||||
" plt.text(-0.4, 0.55, \"4\", fontsize=14)\n",
|
||||
" plt.text(-0.3, 0.90, \"5\", fontsize=14)\n",
|
||||
" plt.text(-0.75, -0.95, \"1\", fontsize=14)\n",
|
||||
" plt.text(-1.05, -0.95, \"2\", fontsize=14)\n",
|
||||
" plt.text(1.0, -0.95, \"3\", fontsize=14)\n",
|
||||
" plt.text(-1.45, -0.5, \"4\", fontsize=14)\n",
|
||||
" plt.text(1.36, -0.95, \"5\", fontsize=14)\n",
|
||||
" else:\n",
|
||||
" plt.ylabel(\"\")\n",
|
||||
"\n",
|
||||
|
@ -557,15 +576,6 @@
|
|||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"list(m for m in dir(ada_clf) if not m.startswith(\"_\") and m.endswith(\"_\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -575,7 +585,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -586,7 +596,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -598,7 +608,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -609,7 +619,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -620,7 +630,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -629,7 +639,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -638,7 +648,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -647,7 +657,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -663,7 +673,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -703,7 +713,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -715,7 +725,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -725,7 +735,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -755,7 +765,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -778,7 +788,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -787,7 +797,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -816,7 +826,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -840,7 +850,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -849,7 +859,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -865,7 +875,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -878,7 +888,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -892,7 +902,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -906,7 +916,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -915,7 +925,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -966,7 +976,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -975,7 +985,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -994,7 +1004,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1005,19 +1015,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
||||
"extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)\n",
|
||||
"svm_clf = LinearSVC(random_state=42)\n",
|
||||
"svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)\n",
|
||||
"mlp_clf = MLPClassifier(random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1029,7 +1039,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1052,7 +1062,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1061,7 +1071,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1075,7 +1085,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1084,7 +1094,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1093,7 +1103,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1102,7 +1112,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1118,7 +1128,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 67,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1134,7 +1144,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 68,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1150,7 +1160,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1166,7 +1176,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1182,7 +1192,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1198,7 +1208,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1207,7 +1217,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1230,7 +1240,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1240,7 +1250,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1270,7 +1280,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1282,7 +1292,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1291,7 +1301,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1301,7 +1311,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1324,7 +1334,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1336,7 +1346,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1345,7 +1355,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1354,7 +1364,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1392,7 +1402,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "252px",
|
||||
|
|
|
@ -74,11 +74,7 @@
|
|||
" print(\"Saving figure\", fig_id)\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
|
||||
"\n",
|
||||
"# Ignore useless warnings (see SciPy issue #5998)\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -765,6 +761,13 @@
|
|||
"# MNIST compression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning:** since Scikit-Learn 0.24, `fetch_openml()` returns a Pandas `DataFrame` by default. To avoid this and keep the same code as in the book, we set `as_frame=True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
|
@ -773,7 +776,7 @@
|
|||
"source": [
|
||||
"from sklearn.datasets import fetch_openml\n",
|
||||
"\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, as_frame=False)\n",
|
||||
"mnist.target = mnist.target.astype(np.uint8)"
|
||||
]
|
||||
},
|
||||
|
@ -1105,15 +1108,15 @@
|
|||
"\n",
|
||||
"for n_components in (2, 10, 154):\n",
|
||||
" print(\"n_components =\", n_components)\n",
|
||||
" regular_pca = PCA(n_components=n_components)\n",
|
||||
" regular_pca = PCA(n_components=n_components, svd_solver=\"full\")\n",
|
||||
" inc_pca = IncrementalPCA(n_components=n_components, batch_size=500)\n",
|
||||
" rnd_pca = PCA(n_components=n_components, random_state=42, svd_solver=\"randomized\")\n",
|
||||
"\n",
|
||||
" for pca in (regular_pca, inc_pca, rnd_pca):\n",
|
||||
" for name, pca in ((\"PCA\", regular_pca), (\"Inc PCA\", inc_pca), (\"Rnd PCA\", rnd_pca)):\n",
|
||||
" t1 = time.time()\n",
|
||||
" pca.fit(X_train)\n",
|
||||
" t2 = time.time()\n",
|
||||
" print(\" {}: {:.1f} seconds\".format(pca.__class__.__name__, t2 - t1))"
|
||||
" print(\" {}: {:.1f} seconds\".format(name, t2 - t1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1139,7 +1142,7 @@
|
|||
" pca.fit(X)\n",
|
||||
" t2 = time.time()\n",
|
||||
" times_rpca.append(t2 - t1)\n",
|
||||
" pca = PCA(n_components = 2)\n",
|
||||
" pca = PCA(n_components=2, svd_solver=\"full\")\n",
|
||||
" t1 = time.time()\n",
|
||||
" pca.fit(X)\n",
|
||||
" t2 = time.time()\n",
|
||||
|
@ -1178,7 +1181,7 @@
|
|||
" pca.fit(X)\n",
|
||||
" t2 = time.time()\n",
|
||||
" times_rpca.append(t2 - t1)\n",
|
||||
" pca = PCA(n_components = 2)\n",
|
||||
" pca = PCA(n_components=2, svd_solver=\"full\")\n",
|
||||
" t1 = time.time()\n",
|
||||
" pca.fit(X)\n",
|
||||
" t2 = time.time()\n",
|
||||
|
@ -1731,7 +1734,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Nice! Reducing dimensionality led to a 4× speedup. :) Let's check the model's accuracy:"
|
||||
"Nice! Reducing dimensionality led to over 2× speedup. :) Let's check the model's accuracy:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1748,7 +1751,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A very slight drop in performance, which might be a reasonable price to pay for a 4× speedup, depending on the application."
|
||||
"A very slight drop in performance, which might be a reasonable price to pay for a 2× speedup, depending on the application."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2229,7 +2232,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Yes, PCA roughly gave us a 25% speedup, without damaging the result. We have a winner!"
|
||||
"Yes, PCA roughly gave us over 2x speedup, without damaging the result. We have a winner!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2256,7 +2259,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -84,11 +84,7 @@
|
|||
" print(\"Saving figure\", fig_id)\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
|
||||
"\n",
|
||||
"# Ignore useless warnings (see SciPy issue #5998)\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -735,12 +731,20 @@
|
|||
"y_proba.round(2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: `model.predict_classes(X_new)` is deprecated. It is replaced with `np.argmax(model.predict(X_new), axis=-1)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#y_pred = model.predict_classes(X_new) # deprecated\n",
|
||||
"y_pred = np.argmax(model.predict(X_new), axis=-1)\n",
|
||||
"y_pred"
|
||||
]
|
||||
|
@ -1505,7 +1509,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following cell crashes at the end of training. This seems to be caused by [Keras issue #13586](https://github.com/keras-team/keras/issues/13586), which was triggered by a recent change in Scikit-Learn. [Pull Request #13598](https://github.com/keras-team/keras/pull/13598) seems to fix the issue, so this problem should be resolved soon."
|
||||
"**Warning**: the following cell crashes at the end of training. This seems to be caused by [Keras issue #13586](https://github.com/keras-team/keras/issues/13586), which was triggered by a recent change in Scikit-Learn. [Pull Request #13598](https://github.com/keras-team/keras/pull/13598) seems to fix the issue, so this problem should be resolved soon. In the meantime, I've added `.tolist()` and `.rvs(1000).tolist()` as workarounds."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1519,8 +1523,8 @@
|
|||
"\n",
|
||||
"param_distribs = {\n",
|
||||
" \"n_hidden\": [0, 1, 2, 3],\n",
|
||||
" \"n_neurons\": np.arange(1, 100),\n",
|
||||
" \"learning_rate\": reciprocal(3e-4, 3e-2),\n",
|
||||
" \"n_neurons\": np.arange(1, 100) .tolist(),\n",
|
||||
" \"learning_rate\": reciprocal(3e-4, 3e-2) .rvs(1000).tolist(),\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)\n",
|
||||
|
@ -1879,6 +1883,7 @@
|
|||
"plt.gca().set_xscale('log')\n",
|
||||
"plt.hlines(min(expon_lr.losses), min(expon_lr.rates), max(expon_lr.rates))\n",
|
||||
"plt.axis([min(expon_lr.rates), max(expon_lr.rates), 0, expon_lr.losses[0]])\n",
|
||||
"plt.grid()\n",
|
||||
"plt.xlabel(\"Learning rate\")\n",
|
||||
"plt.ylabel(\"Loss\")"
|
||||
]
|
||||
|
@ -1887,7 +1892,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The loss starts shooting back up violently around 3e-1, so let's try using 2e-1 as our learning rate:"
|
||||
"The loss starts shooting back up violently when the learning rate goes over 6e-1, so let's try using half of that, at 3e-1:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1922,7 +1927,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=keras.optimizers.SGD(lr=2e-1),\n",
|
||||
" optimizer=keras.optimizers.SGD(lr=3e-1),\n",
|
||||
" metrics=[\"accuracy\"])"
|
||||
]
|
||||
},
|
||||
|
@ -1949,7 +1954,7 @@
|
|||
"\n",
|
||||
"history = model.fit(X_train, y_train, epochs=100,\n",
|
||||
" validation_data=(X_valid, y_valid),\n",
|
||||
" callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb])"
|
||||
" callbacks=[checkpoint_cb, early_stopping_cb, tensorboard_cb])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2002,7 +2007,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "264px",
|
||||
|
|
|
@ -673,7 +673,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bn1.updates"
|
||||
"#bn1.updates #deprecated"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -953,7 +953,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.summary()"
|
||||
"model_B.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1039,7 +1039,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Great! We got quite a bit of transfer: the error rate dropped by a factor of 4!"
|
||||
"Great! We got quite a bit of transfer: the error rate dropped by a factor of 4.5!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1048,7 +1048,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"(100 - 96.95) / (100 - 99.25)"
|
||||
"(100 - 97.05) / (100 - 99.35)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2274,7 +2274,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The model with the lowest validation loss gets about 47% accuracy on the validation set. It took 39 epochs to reach the lowest validation loss, with roughly 10 seconds per epoch on my laptop (without a GPU). Let's see if we can improve performance using Batch Normalization."
|
||||
"The model with the lowest validation loss gets about 47.6% accuracy on the validation set. It took 27 epochs to reach the lowest validation loss, with roughly 8 seconds per epoch on my laptop (without a GPU). Let's see if we can improve performance using Batch Normalization."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2339,9 +2339,9 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* *Is the model converging faster than before?* Much faster! The previous model took 39 epochs to reach the lowest validation loss, while the new model with BN took 18 epochs. That's more than twice as fast as the previous model. The BN layers stabilized training and allowed us to use a much larger learning rate, so convergence was faster.\n",
|
||||
"* *Does BN produce a better model?* Yes! The final model is also much better, with 55% accuracy instead of 47%. It's still not a very good model, but at least it's much better than before (a Convolutional Neural Network would do much better, but that's a different topic, see chapter 14).\n",
|
||||
"* *How does BN affect training speed?* Although the model converged twice as fast, each epoch took about 16s instead of 10s, because of the extra computations required by the BN layers. So overall, although the number of epochs was reduced by 50%, the training time (wall time) was shortened by 30%. Which is still pretty significant!"
|
||||
"* *Is the model converging faster than before?* Much faster! The previous model took 27 epochs to reach the lowest validation loss, while the new model achieved that same loss in just 5 epochs and continued to make progress until the 16th epoch. The BN layers stabilized training and allowed us to use a much larger learning rate, so convergence was faster.\n",
|
||||
"* *Does BN produce a better model?* Yes! The final model is also much better, with 54.0% accuracy instead of 47.6%. It's still not a very good model, but at least it's much better than before (a Convolutional Neural Network would do much better, but that's a different topic, see chapter 14).\n",
|
||||
"* *How does BN affect training speed?* Although the model converged much faster, each epoch took about 12s instead of 8s, because of the extra computations required by the BN layers. But overall the training time (wall time) was shortened significantly!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2412,7 +2412,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We get 51.4% accuracy, which is better than the original model, but not quite as good as the model using batch normalization. Moreover, it took 13 epochs to reach the best model, which is much faster than both the original model and the BN model, plus each epoch took only 10 seconds, just like the original model. So it's by far the fastest model to train (both in terms of epochs and wall time)."
|
||||
"We get 47.9% accuracy, which is not much better than the original model (47.6%), and not as good as the model using batch normalization (54.0%). However, convergence was almost as fast as with the BN model, plus each epoch took only 7 seconds. So it's by far the fastest model to train so far."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2473,7 +2473,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The model reaches 50.8% accuracy on the validation set. That's very slightly worse than without dropout (51.4%). With an extensive hyperparameter search, it might be possible to do better (I tried dropout rates of 5%, 10%, 20% and 40%, and learning rates 1e-4, 3e-4, 5e-4, and 1e-3), but probably not much better in this case."
|
||||
"The model reaches 48.9% accuracy on the validation set. That's very slightly better than without dropout (47.6%). With an extensive hyperparameter search, it might be possible to do better (I tried dropout rates of 5%, 10%, 20% and 40%, and learning rates 1e-4, 3e-4, 5e-4, and 1e-3), but probably not much better in this case."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2561,7 +2561,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We only get virtually no accuracy improvement in this case (from 50.8% to 50.9%).\n",
|
||||
"We get no accuracy improvement in this case (we're still at 48.9% accuracy).\n",
|
||||
"\n",
|
||||
"So the best model we got in this exercise is the Batch Normalization model."
|
||||
]
|
||||
|
@ -2655,7 +2655,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"One cycle allowed us to train the model in just 15 epochs, each taking only 3 seconds (thanks to the larger batch size). This is over 3 times faster than the fastest model we trained so far. Moreover, we improved the model's performance (from 50.8% to 52.8%). The batch normalized model reaches a slightly better performance, but it's much slower to train."
|
||||
"One cycle allowed us to train the model in just 15 epochs, each taking only 2 seconds (thanks to the larger batch size). This is several times faster than the fastest model we trained so far. Moreover, we improved the model's performance (from 47.6% to 52.0%). The batch normalized model reaches a slightly better performance (54%), but it's much slower to train."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2682,7 +2682,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "360px",
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1026,7 +1026,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: there's currently a bug preventing `from tensorflow.train import X` so we work around it by writing `X = tf.train.X`. See https://github.com/tensorflow/tensorflow/issues/33289 for more details."
|
||||
"**Warning**: in TensorFlow 2.0 and 2.1, there was a bug preventing `from tensorflow.train import X` so we work around it by writing `X = tf.train.X`. See https://github.com/tensorflow/tensorflow/issues/33289 for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1294,7 +1294,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: there's currently a bug preventing `from tensorflow.train import X` so we work around it by writing `X = tf.train.X`. See https://github.com/tensorflow/tensorflow/issues/33289 for more details."
|
||||
"**Warning**: in TensorFlow 2.0 and 2.1, there was a bug preventing `from tensorflow.train import X` so we work around it by writing `X = tf.train.X`. See https://github.com/tensorflow/tensorflow/issues/33289 for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1430,7 +1430,7 @@
|
|||
"source": [
|
||||
"import os\n",
|
||||
"import tarfile\n",
|
||||
"import urllib\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
"HOUSING_PATH = os.path.join(\"datasets\", \"housing\")\n",
|
||||
|
@ -2040,8 +2040,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"train_set = mnist_dataset(train_filepaths, shuffle_buffer_size=60000)\n",
|
||||
"valid_set = mnist_dataset(train_filepaths)\n",
|
||||
"test_set = mnist_dataset(train_filepaths)"
|
||||
"valid_set = mnist_dataset(valid_filepaths)\n",
|
||||
"test_set = mnist_dataset(test_filepaths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2120,7 +2120,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 162,
|
||||
"execution_count": 130,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2267,14 +2267,14 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It takes about 20 seconds to load the dataset and go through it 10 times."
|
||||
"It takes about 17 seconds to load the dataset and go through it 10 times."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"But let's pretend the dataset does not fit in memory, just to make things more interesting. Luckily, each review fits on just one line (they use `<br />` to indicate line breaks), so we can read the reviews using a `TextLineDataset`. If they didn't we would have to preprocess the input files (e.g., converting them to TFRecords). For very large datasets, it would make sense a tool like Apache Beam for that."
|
||||
"But let's pretend the dataset does not fit in memory, just to make things more interesting. Luckily, each review fits on just one line (they use `<br />` to indicate line breaks), so we can read the reviews using a `TextLineDataset`. If they didn't we would have to preprocess the input files (e.g., converting them to TFRecords). For very large datasets, it would make sense to use a tool like Apache Beam for that."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2306,7 +2306,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now it takes about 34 seconds to go through the dataset 10 times. That's much slower, essentially because the dataset is not cached in RAM, so it must be reloaded at each epoch. If you add `.cache()` just before `.repeat(10)`, you will see that this implementation will be about as fast as the previous one."
|
||||
"Now it takes about 33 seconds to go through the dataset 10 times. That's much slower, essentially because the dataset is not cached in RAM, so it must be reloaded at each epoch. If you add `.cache()` just before `.repeat(10)`, you will see that this implementation will be about as fast as the previous one."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2473,7 +2473,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's run it on the same `X_example`, just to make sure the word IDs are larger now, since the vocabulary bigger:"
|
||||
"Let's run it on the same `X_example`, just to make sure the word IDs are larger now, since the vocabulary is bigger:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2540,7 +2540,7 @@
|
|||
"source": [
|
||||
"class BagOfWords(keras.layers.Layer):\n",
|
||||
" def __init__(self, n_tokens, dtype=tf.int32, **kwargs):\n",
|
||||
" super().__init__(dtype=tf.int32, **kwargs)\n",
|
||||
" super().__init__(dtype=dtype, **kwargs)\n",
|
||||
" self.n_tokens = n_tokens\n",
|
||||
" def call(self, inputs):\n",
|
||||
" one_hot = tf.one_hot(inputs, self.n_tokens)\n",
|
||||
|
@ -2609,7 +2609,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We get about 75% accuracy on the validation set after just the first epoch, but after that the model makes no progress. We will do better in Chapter 16. For now the point is just to perform efficient preprocessing using `tf.data` and Keras preprocessing layers."
|
||||
"We get about 73.7% accuracy on the validation set after just the first epoch, but after that the model makes no significant progress. We will do better in Chapter 16. For now the point is just to perform efficient preprocessing using `tf.data` and Keras preprocessing layers."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2766,7 +2766,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "264px",
|
||||
|
|
|
@ -1366,7 +1366,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -1857,6 +1857,13 @@
|
|||
"Now let's write a function that will generate a new chorale. We will give it a few seed chords, it will convert them to arpegios (the format expected by the model), and use the model to predict the next note, then the next, and so on. In the end, it will group the notes 4 by 4 to create chords again, and return the resulting chorale."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: `model.predict_classes(X)` is deprecated. It is replaced with `np.argmax(model.predict(X), axis=-1)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
|
@ -1868,7 +1875,8 @@
|
|||
" arpegio = tf.reshape(arpegio, [1, -1])\n",
|
||||
" for chord in range(length):\n",
|
||||
" for note in range(4):\n",
|
||||
" next_note = model.predict_classes(arpegio)[:1, -1:]\n",
|
||||
" #next_note = model.predict_classes(arpegio)[:1, -1:]\n",
|
||||
" next_note = np.argmax(model.predict(arpegio), axis=-1)[:1, -1:]\n",
|
||||
" arpegio = tf.concat([arpegio, next_note], axis=1)\n",
|
||||
" arpegio = tf.where(arpegio == 0, arpegio, arpegio + min_note - 1)\n",
|
||||
" return tf.reshape(arpegio, shape=[-1, 4])"
|
||||
|
@ -2010,7 +2018,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
" # %tensorflow_version only exists in Colab.\n",
|
||||
" %tensorflow_version 2.x\n",
|
||||
" !pip install -q -U tensorflow-addons\n",
|
||||
" !pip install -q -U transformers\n",
|
||||
" IS_COLAB = True\n",
|
||||
"except Exception:\n",
|
||||
" IS_COLAB = False\n",
|
||||
|
@ -309,6 +310,20 @@
|
|||
"## Creating and Training the Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the following code may take up to 24 hours to run, depending on your hardware. If you use a GPU, it may take just 1 or 2 hours, or less."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: the `GRU` class will only use the GPU (if you have one) when using the default values for the following arguments: `activation`, `recurrent_activation`, `recurrent_dropout`, `unroll`, `use_bias` and `reset_after`. This is why I commented out `recurrent_dropout=0.2` (compared to the book)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
|
@ -317,9 +332,11 @@
|
|||
"source": [
|
||||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],\n",
|
||||
" dropout=0.2, recurrent_dropout=0.2),\n",
|
||||
" #dropout=0.2, recurrent_dropout=0.2),\n",
|
||||
" dropout=0.2),\n",
|
||||
" keras.layers.GRU(128, return_sequences=True,\n",
|
||||
" dropout=0.2, recurrent_dropout=0.2),\n",
|
||||
" #dropout=0.2, recurrent_dropout=0.2),\n",
|
||||
" dropout=0.2),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(max_id,\n",
|
||||
" activation=\"softmax\"))\n",
|
||||
"])\n",
|
||||
|
@ -346,6 +363,13 @@
|
|||
" return tf.one_hot(X, max_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: the `predict_classes()` method is deprecated. Instead, we must use `np.argmax(model.predict(X_new), axis=-1)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
|
@ -353,7 +377,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"X_new = preprocess([\"How are yo\"])\n",
|
||||
"Y_pred = model.predict_classes(X_new)\n",
|
||||
"#Y_pred = model.predict_classes(X_new)\n",
|
||||
"Y_pred = np.argmax(model.predict(X_new), axis=-1)\n",
|
||||
"tokenizer.sequences_to_texts(Y_pred + 1)[0][-1] # 1st sentence, last char"
|
||||
]
|
||||
},
|
||||
|
@ -614,7 +639,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"(X_train, y_test), (X_valid, y_test) = keras.datasets.imdb.load_data()"
|
||||
"(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1383,7 +1408,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"def string_to_ids(s, chars=POSSIBLE_CHARS):\n",
|
||||
" return [POSSIBLE_CHARS.index(c) for c in s]"
|
||||
" return [chars.index(c) for c in s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1452,7 +1477,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"What classes does it belong to?"
|
||||
"What class does it belong to?"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1599,7 +1624,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"INPUT_CHARS = \"\".join(sorted(set(\"\".join(MONTHS)))) + \"01234567890, \"\n",
|
||||
"INPUT_CHARS = \"\".join(sorted(set(\"\".join(MONTHS) + \"0123456789, \")))\n",
|
||||
"INPUT_CHARS"
|
||||
]
|
||||
},
|
||||
|
@ -1785,7 +1810,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ids = model.predict_classes(X_new)\n",
|
||||
"#ids = model.predict_classes(X_new)\n",
|
||||
"ids = np.argmax(model.predict(X_new), axis=-1)\n",
|
||||
"for date_str in ids_to_date_strs(ids):\n",
|
||||
" print(date_str)"
|
||||
]
|
||||
|
@ -1819,7 +1845,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ids = model.predict_classes(X_new)\n",
|
||||
"#ids = model.predict_classes(X_new)\n",
|
||||
"ids = np.argmax(model.predict(X_new), axis=-1)\n",
|
||||
"for date_str in ids_to_date_strs(ids):\n",
|
||||
" print(date_str)"
|
||||
]
|
||||
|
@ -1847,7 +1874,8 @@
|
|||
"\n",
|
||||
"def convert_date_strs(date_strs):\n",
|
||||
" X = prepare_date_strs_padded(date_strs)\n",
|
||||
" ids = model.predict_classes(X)\n",
|
||||
" #ids = model.predict_classes(X)\n",
|
||||
" ids = np.argmax(model.predict(X), axis=-1)\n",
|
||||
" return ids_to_date_strs(ids)"
|
||||
]
|
||||
},
|
||||
|
@ -2063,7 +2091,7 @@
|
|||
" len(INPUT_CHARS) + 1, encoder_embedding_size)(encoder_inputs)\n",
|
||||
"\n",
|
||||
"decoder_embedding_layer = keras.layers.Embedding(\n",
|
||||
" len(INPUT_CHARS) + 2, decoder_embedding_size)\n",
|
||||
" len(OUTPUT_CHARS) + 2, decoder_embedding_size)\n",
|
||||
"decoder_embeddings = decoder_embedding_layer(decoder_inputs)\n",
|
||||
"\n",
|
||||
"encoder = keras.layers.LSTM(units, return_state=True)\n",
|
||||
|
@ -2226,7 +2254,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: due to a TF bug, this version only works using TensorFlow 2.2."
|
||||
"**Warning**: due to a TF bug, this version only works using TensorFlow 2.2 or above."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2260,7 +2288,7 @@
|
|||
" len(INPUT_CHARS) + 1, encoder_embedding_size)(encoder_inputs)\n",
|
||||
"\n",
|
||||
"decoder_embedding_layer = keras.layers.Embedding(\n",
|
||||
" len(INPUT_CHARS) + 2, decoder_embedding_size)\n",
|
||||
" len(OUTPUT_CHARS) + 2, decoder_embedding_size)\n",
|
||||
"decoder_embeddings = decoder_embedding_layer(decoder_inputs)\n",
|
||||
"\n",
|
||||
"encoder = keras.layers.LSTM(units, return_state=True)\n",
|
||||
|
@ -2711,7 +2739,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -1603,6 +1603,203 @@
|
|||
" plt.imshow(image, cmap=\"binary\")\n",
|
||||
" plt.axis(\"off\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise Solutions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. to 8.\n",
|
||||
"\n",
|
||||
"See Appendix A."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 9.\n",
|
||||
"_Exercise: Try using a denoising autoencoder to pretrain an image classifier. You can use MNIST (the simplest option), or a more complex image dataset such as [CIFAR10](https://homl.info/122) if you want a bigger challenge. Regardless of the dataset you're using, follow these steps:_\n",
|
||||
"* Split the dataset into a training set and a test set. Train a deep denoising autoencoder on the full training set.\n",
|
||||
"* Check that the images are fairly well reconstructed. Visualize the images that most activate each neuron in the coding layer.\n",
|
||||
"* Build a classification DNN, reusing the lower layers of the autoencoder. Train it using only 500 images from the training set. Does it perform better with or without pretraining?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"[X_train, y_train], [X_test, y_test] = keras.datasets.cifar10.load_data()\n",
|
||||
"X_train = X_train / 255\n",
|
||||
"X_test = X_test / 255"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.random.set_seed(42)\n",
|
||||
"np.random.seed(42)\n",
|
||||
"\n",
|
||||
"denoising_encoder = keras.models.Sequential([\n",
|
||||
" keras.layers.GaussianNoise(0.1, input_shape=[32, 32, 3]),\n",
|
||||
" keras.layers.Conv2D(32, kernel_size=3, padding=\"same\", activation=\"relu\"),\n",
|
||||
" keras.layers.MaxPool2D(),\n",
|
||||
" keras.layers.Flatten(),\n",
|
||||
" keras.layers.Dense(512, activation=\"relu\"),\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"denoising_encoder.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"denoising_decoder = keras.models.Sequential([\n",
|
||||
" keras.layers.Dense(16 * 16 * 32, activation=\"relu\", input_shape=[512]),\n",
|
||||
" keras.layers.Reshape([16, 16, 32]),\n",
|
||||
" keras.layers.Conv2DTranspose(filters=3, kernel_size=3, strides=2,\n",
|
||||
" padding=\"same\", activation=\"sigmoid\")\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"denoising_decoder.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"denoising_ae = keras.models.Sequential([denoising_encoder, denoising_decoder])\n",
|
||||
"denoising_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.Nadam(),\n",
|
||||
" metrics=[\"mse\"])\n",
|
||||
"history = denoising_ae.fit(X_train, X_train, epochs=10,\n",
|
||||
" validation_data=(X_test, X_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_images = 5\n",
|
||||
"new_images = X_test[:n_images]\n",
|
||||
"new_images_noisy = new_images + np.random.randn(n_images, 32, 32, 3) * 0.1\n",
|
||||
"new_images_denoised = denoising_ae.predict(new_images_noisy)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6, n_images * 2))\n",
|
||||
"for index in range(n_images):\n",
|
||||
" plt.subplot(n_images, 3, index * 3 + 1)\n",
|
||||
" plt.imshow(new_images[index])\n",
|
||||
" plt.axis('off')\n",
|
||||
" if index == 0:\n",
|
||||
" plt.title(\"Original\")\n",
|
||||
" plt.subplot(n_images, 3, index * 3 + 2)\n",
|
||||
" plt.imshow(np.clip(new_images_noisy[index], 0., 1.))\n",
|
||||
" plt.axis('off')\n",
|
||||
" if index == 0:\n",
|
||||
" plt.title(\"Noisy\")\n",
|
||||
" plt.subplot(n_images, 3, index * 3 + 3)\n",
|
||||
" plt.imshow(new_images_denoised[index])\n",
|
||||
" plt.axis('off')\n",
|
||||
" if index == 0:\n",
|
||||
" plt.title(\"Denoised\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 10.\n",
|
||||
"_Exercise: Train a variational autoencoder on the image dataset of your choice, and use it to generate images. Alternatively, you can try to find an unlabeled dataset that you are interested in and see if you can generate new samples._\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 11.\n",
|
||||
"_Exercise: Train a DCGAN to tackle the image dataset of your choice, and use it to generate images. Add experience replay and see if this helps. Turn it into a conditional GAN where you can control the generated class._\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -1621,7 +1818,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "381px",
|
||||
|
|
|
@ -565,7 +565,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create a neural network that will take observations as inputs, and output the action to take for each observation. To choose an action, the network will estimate a probability for each action, then we will select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`."
|
||||
"Let's create a neural network that will take observations as inputs, and output the probabilities of actions to take for each observation. To choose an action, the network will estimate a probability for each action, then we will select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -639,7 +639,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1306,7 +1306,7 @@
|
|||
"source": [
|
||||
"def epsilon_greedy_policy(state, epsilon=0):\n",
|
||||
" if np.random.rand() < epsilon:\n",
|
||||
" return np.random.randint(2)\n",
|
||||
" return np.random.randint(n_outputs)\n",
|
||||
" else:\n",
|
||||
" Q_values = model.predict(state[np.newaxis])\n",
|
||||
" return np.argmax(Q_values[0])"
|
||||
|
@ -1316,7 +1316,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will also need a replay memory. It will contain the agent's experiences, in the form of tuples: `(obs, action, reward, next_obs, done)`. We can use the `deque` class for that:"
|
||||
"We will also need a replay memory. It will contain the agent's experiences, in the form of tuples: `(obs, action, reward, next_obs, done)`. We can use the `deque` class for that (but make sure to check out DeepMind's excellent [Reverb library](https://github.com/deepmind/reverb) for a much more robust implementation of experience replay):"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2187,13 +2187,9 @@
|
|||
"source": [
|
||||
"from tf_agents.agents.dqn.dqn_agent import DqnAgent\n",
|
||||
"\n",
|
||||
"# see TF-agents issue #113\n",
|
||||
"#optimizer = keras.optimizers.RMSprop(lr=2.5e-4, rho=0.95, momentum=0.0,\n",
|
||||
"# epsilon=0.00001, centered=True)\n",
|
||||
"\n",
|
||||
"train_step = tf.Variable(0)\n",
|
||||
"update_period = 4 # run a training step every 4 collect steps\n",
|
||||
"optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=2.5e-4, decay=0.95, momentum=0.0,\n",
|
||||
"optimizer = keras.optimizers.RMSprop(lr=2.5e-4, rho=0.95, momentum=0.0,\n",
|
||||
" epsilon=0.00001, centered=True)\n",
|
||||
"epsilon_fn = keras.optimizers.schedules.PolynomialDecay(\n",
|
||||
" initial_learning_rate=1.0, # initial ε\n",
|
||||
|
@ -2215,7 +2211,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create the replay buffer:"
|
||||
"Create the replay buffer (this will use a lot of RAM, so please reduce the buffer size if you get an out-of-memory error):"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2229,7 +2225,7 @@
|
|||
"replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(\n",
|
||||
" data_spec=agent.collect_data_spec,\n",
|
||||
" batch_size=tf_env.batch_size,\n",
|
||||
" max_length=1000000)\n",
|
||||
" max_length=1000000) # reduce if OOM error\n",
|
||||
"\n",
|
||||
"replay_buffer_observer = replay_buffer.add_batch"
|
||||
]
|
||||
|
@ -2356,16 +2352,28 @@
|
|||
"Let's sample 2 sub-episodes, with 3 time steps each and display them:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: `replay_buffer.get_next()` is deprecated. We must use `replay_buffer.as_dataset(..., single_deterministic_pass=False)` instead."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.random.set_seed(888) # chosen to show an example of trajectory at the end of an episode\n",
|
||||
"tf.random.set_seed(93) # chosen to show an example of trajectory at the end of an episode\n",
|
||||
"\n",
|
||||
"trajectories, buffer_info = replay_buffer.get_next(\n",
|
||||
" sample_batch_size=2, num_steps=3)"
|
||||
"#trajectories, buffer_info = replay_buffer.get_next(\n",
|
||||
"# sample_batch_size=2, num_steps=3)\n",
|
||||
"\n",
|
||||
"trajectories, buffer_info = next(iter(replay_buffer.as_dataset(\n",
|
||||
" sample_batch_size=2,\n",
|
||||
" num_steps=3,\n",
|
||||
" single_deterministic_pass=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2790,7 +2798,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -286,12 +286,12 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.round([[1.1739199e-04, 1.1239604e-07, 6.0210604e-04, 2.0804715e-03, 2.5779348e-06,\n",
|
||||
" 6.4079795e-05, 2.7411186e-08, 9.9669880e-01, 3.9654213e-05, 3.9471846e-04],\n",
|
||||
" [1.2294615e-03, 2.9207937e-05, 9.8599273e-01, 9.6755642e-03, 8.8930705e-08,\n",
|
||||
" 2.9156188e-04, 1.5831805e-03, 1.1311053e-09, 1.1980456e-03, 1.1113169e-07],\n",
|
||||
" [6.4066830e-05, 9.6359509e-01, 9.0598064e-03, 2.9872139e-03, 5.9552520e-04,\n",
|
||||
" 3.7478798e-03, 2.5074568e-03, 1.1462728e-02, 5.5553433e-03, 4.2495009e-04]], 2)"
|
||||
"np.round([[1.1347984e-04, 1.5187356e-07, 9.7032893e-04, 2.7640699e-03, 3.7826971e-06,\n",
|
||||
" 7.6876910e-05, 3.9140293e-08, 9.9559116e-01, 5.3502394e-05, 4.2665208e-04],\n",
|
||||
" [8.2443521e-04, 3.5493889e-05, 9.8826385e-01, 7.0466995e-03, 1.2957400e-07,\n",
|
||||
" 2.3389691e-04, 2.5639210e-03, 9.5886099e-10, 1.0314899e-03, 8.7952529e-08],\n",
|
||||
" [4.4693781e-05, 9.7028232e-01, 9.0526715e-03, 2.2641101e-03, 4.8766597e-04,\n",
|
||||
" 2.8800720e-03, 2.2714981e-03, 8.3753867e-03, 4.0439744e-03, 2.9759688e-04]], 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -682,13 +682,21 @@
|
|||
"# Using GPUs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: `tf.test.is_gpu_available()` is deprecated. Instead, please use `tf.config.list_physical_devices('GPU')`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.test.is_gpu_available()"
|
||||
"#tf.test.is_gpu_available() # deprecated\n",
|
||||
"tf.config.list_physical_devices('GPU')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -799,7 +807,12 @@
|
|||
"# Use the central storage strategy instead:\n",
|
||||
"#distribution = tf.distribute.experimental.CentralStorageStrategy()\n",
|
||||
"\n",
|
||||
"#resolver = tf.distribute.cluster_resolver.TPUClusterResolver()\n",
|
||||
"#if IS_COLAB and \"COLAB_TPU_ADDR\" in os.environ:\n",
|
||||
"# tpu_address = \"grpc://\" + os.environ[\"COLAB_TPU_ADDR\"]\n",
|
||||
"#else:\n",
|
||||
"# tpu_address = \"\"\n",
|
||||
"#resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu_address)\n",
|
||||
"#tf.config.experimental_connect_to_cluster(resolver)\n",
|
||||
"#tf.tpu.experimental.initialize_tpu_system(resolver)\n",
|
||||
"#distribution = tf.distribute.experimental.TPUStrategy(resolver)\n",
|
||||
"\n",
|
||||
|
@ -886,17 +899,6 @@
|
|||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch_size = 100 # must be divisible by the number of workers\n",
|
||||
"model.fit(X_train, y_train, epochs=10,\n",
|
||||
" validation_data=(X_valid, y_valid), batch_size=batch_size)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -910,24 +912,37 @@
|
|||
"source": [
|
||||
"A TensorFlow cluster is a group of TensorFlow processes running in parallel, usually on different machines, and talking to each other to complete some work, for example training or executing a neural network. Each TF process in the cluster is called a \"task\" (or a \"TF server\"). It has an IP address, a port, and a type (also called its role or its job). The type can be `\"worker\"`, `\"chief\"`, `\"ps\"` (parameter server) or `\"evaluator\"`:\n",
|
||||
"* Each **worker** performs computations, usually on a machine with one or more GPUs.\n",
|
||||
"* The **chief** performs computations as well, but it also handles extra work such as writing TensorBoard logs or saving checkpoints. There is a single chief in a cluster. If no chief is specified, then the first worker is the chief.\n",
|
||||
"* The **chief** performs computations as well, but it also handles extra work such as writing TensorBoard logs or saving checkpoints. There is a single chief in a cluster, typically the first worker (i.e., worker #0).\n",
|
||||
"* A **parameter server** (ps) only keeps track of variable values, it is usually on a CPU-only machine.\n",
|
||||
"* The **evaluator** obviously takes care of evaluation. There is usually a single evaluator in a cluster.\n",
|
||||
"\n",
|
||||
"The set of tasks that share the same type is often called a \"job\". For example, the \"worker\" job is the set of all workers.\n",
|
||||
"\n",
|
||||
"To start a TensorFlow cluster, you must first specify it. This means defining all the tasks (IP address, TCP port, and type). For example, the following cluster specification defines a cluster with 3 tasks (2 workers and 1 parameter server). It's a dictionary with one key per job, and the values are lists of task addresses:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"worker\": [\"my-worker0.example.com:9876\", \"my-worker1.example.com:9876\"],\n",
|
||||
" \"ps\": [\"my-ps0.example.com:9876\"]\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"To start a TensorFlow cluster, you must first define it. This means specifying all the tasks (IP address, TCP port, and type). For example, the following cluster specification defines a cluster with 3 tasks (2 workers and 1 parameter server). It's a dictionary with one key per job, and the values are lists of task addresses:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cluster_spec = {\n",
|
||||
" \"worker\": [\n",
|
||||
" \"machine-a.example.com:2222\", # /job:worker/task:0\n",
|
||||
" \"machine-b.example.com:2222\" # /job:worker/task:1\n",
|
||||
" ],\n",
|
||||
" \"ps\": [\"machine-c.example.com:2222\"] # /job:ps/task:0\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Every task in the cluster may communicate with every other task in the server, so make sure to configure your firewall to authorize all communications between these machines on these ports (it's usually simpler if you use the same port on every machine).\n",
|
||||
"\n",
|
||||
"When a task is started, it needs to be told which one it is: its type and index (the task index is also called the task id). A common way to specify everything at once (both the cluster spec and the current task's type and id) is to set the `TF_CONFIG` environment variable before starting the program. It must be a JSON-encoded dictionary containing a cluster specification (under the `\"cluster\"` key), and the type and index of the task to start (under the `\"task\"` key). For example, the following `TF_CONFIG` environment variable defines a simple cluster with 2 workers and 1 parameter server, and specifies that the task to start is the first worker:"
|
||||
"When a task is started, it needs to be told which one it is: its type and index (the task index is also called the task id). A common way to specify everything at once (both the cluster spec and the current task's type and id) is to set the `TF_CONFIG` environment variable before starting the program. It must be a JSON-encoded dictionary containing a cluster specification (under the `\"cluster\"` key), and the type and index of the task to start (under the `\"task\"` key). For example, the following `TF_CONFIG` environment variable defines the same cluster as above, with 2 workers and 1 parameter server, and specifies that the task to start is worker #1:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -940,13 +955,10 @@
|
|||
"import json\n",
|
||||
"\n",
|
||||
"os.environ[\"TF_CONFIG\"] = json.dumps({\n",
|
||||
" \"cluster\": {\n",
|
||||
" \"worker\": [\"my-work0.example.com:9876\", \"my-work1.example.com:9876\"],\n",
|
||||
" \"ps\": [\"my-ps0.example.com:9876\"]\n",
|
||||
" },\n",
|
||||
" \"task\": {\"type\": \"worker\", \"index\": 0}\n",
|
||||
" \"cluster\": cluster_spec,\n",
|
||||
" \"task\": {\"type\": \"worker\", \"index\": 1}\n",
|
||||
"})\n",
|
||||
"print(\"TF_CONFIG='{}'\".format(os.environ[\"TF_CONFIG\"]))"
|
||||
"os.environ[\"TF_CONFIG\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -960,7 +972,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then you would write a short Python script to start a task. The same script can be used on every machine, since it will load the `TF_CONFIG` variable, which will tell it which task to start:"
|
||||
"TensorFlow's `TFConfigClusterResolver` class reads the cluster configuration from this environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -972,16 +984,7 @@
|
|||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()\n",
|
||||
"worker0 = tf.distribute.Server(resolver.cluster_spec(),\n",
|
||||
" job_name=resolver.task_type,\n",
|
||||
" task_index=resolver.task_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Another way to specify the cluster specification is directly in Python, rather than through an environment variable:"
|
||||
"resolver.cluster_spec()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -990,17 +993,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cluster_spec = tf.train.ClusterSpec({\n",
|
||||
" \"worker\": [\"127.0.0.1:9901\", \"127.0.0.1:9902\"],\n",
|
||||
" \"ps\": [\"127.0.0.1:9903\"]\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can then start a server simply by passing it the cluster spec and indicating its type and index. Let's start the two remaining tasks (remember that in general you would only start a single task per machine; we are starting 3 tasks on the localhost just for the purpose of this code example):"
|
||||
"resolver.task_type"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1009,8 +1002,18 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#worker1 = tf.distribute.Server(cluster_spec, job_name=\"worker\", task_index=1)\n",
|
||||
"ps0 = tf.distribute.Server(cluster_spec, job_name=\"ps\", task_index=0)"
|
||||
"resolver.task_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's run a simpler cluster with just two worker tasks, both running on the local machine. We will use the `MultiWorkerMirroredStrategy` to train a model across these two tasks.\n",
|
||||
"\n",
|
||||
"The first step is to write the training code. As this code will be used to run both workers, each in its own process, we write this code to a separate Python file, `my_mnist_multiworker_task.py`. The code is relatively straightforward, but there are a couple important things to note:\n",
|
||||
"* We create the `MultiWorkerMirroredStrategy` before doing anything else with TensorFlow.\n",
|
||||
"* Only one of the workers will take care of logging to TensorBoard and saving checkpoints. As mentioned earlier, this worker is called the *chief*, and by convention it is usually worker #0."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1019,70 +1022,41 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"TF_CONFIG\"] = json.dumps({\n",
|
||||
" \"cluster\": {\n",
|
||||
" \"worker\": [\"127.0.0.1:9901\", \"127.0.0.1:9902\"],\n",
|
||||
" \"ps\": [\"127.0.0.1:9903\"]\n",
|
||||
" },\n",
|
||||
" \"task\": {\"type\": \"worker\", \"index\": 1}\n",
|
||||
"})\n",
|
||||
"print(repr(os.environ[\"TF_CONFIG\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"distribution = tf.distribute.experimental.MultiWorkerMirroredStrategy()\n",
|
||||
"%%writefile my_mnist_multiworker_task.py\n",
|
||||
"\n",
|
||||
"keras.backend.clear_session()\n",
|
||||
"tf.random.set_seed(42)\n",
|
||||
"np.random.seed(42)\n",
|
||||
"\n",
|
||||
"os.environ[\"TF_CONFIG\"] = json.dumps({\n",
|
||||
" \"cluster\": {\n",
|
||||
" \"worker\": [\"127.0.0.1:9901\", \"127.0.0.1:9902\"],\n",
|
||||
" \"ps\": [\"127.0.0.1:9903\"]\n",
|
||||
" },\n",
|
||||
" \"task\": {\"type\": \"worker\", \"index\": 1}\n",
|
||||
"})\n",
|
||||
"#CUDA_VISIBLE_DEVICES=0 \n",
|
||||
"\n",
|
||||
"with distribution.scope():\n",
|
||||
" model = create_model()\n",
|
||||
" model.compile(loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=keras.optimizers.SGD(lr=1e-2),\n",
|
||||
" metrics=[\"accuracy\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from tensorflow import keras\n",
|
||||
"import numpy as np\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# At the beginning of the program (restart the kernel before running this cell)\n",
|
||||
"distribution = tf.distribute.experimental.MultiWorkerMirroredStrategy()\n",
|
||||
"# At the beginning of the program\n",
|
||||
"distribution = tf.distribute.MultiWorkerMirroredStrategy()\n",
|
||||
"\n",
|
||||
"resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()\n",
|
||||
"print(\"Starting task {}{}\".format(resolver.task_type, resolver.task_id))\n",
|
||||
"\n",
|
||||
"# Only worker #0 will write checkpoints and log to TensorBoard\n",
|
||||
"if resolver.task_id == 0:\n",
|
||||
" root_logdir = os.path.join(os.curdir, \"my_mnist_multiworker_logs\")\n",
|
||||
" run_id = time.strftime(\"run_%Y_%m_%d-%H_%M_%S\")\n",
|
||||
" run_dir = os.path.join(root_logdir, run_id)\n",
|
||||
" callbacks = [\n",
|
||||
" keras.callbacks.TensorBoard(run_dir),\n",
|
||||
" keras.callbacks.ModelCheckpoint(\"my_mnist_multiworker_model.h5\",\n",
|
||||
" save_best_only=True),\n",
|
||||
" ]\n",
|
||||
"else:\n",
|
||||
" callbacks = []\n",
|
||||
"\n",
|
||||
"# Load and prepare the MNIST dataset\n",
|
||||
"(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()\n",
|
||||
"X_train_full = X_train_full[..., np.newaxis] / 255.\n",
|
||||
"X_test = X_test[..., np.newaxis] / 255.\n",
|
||||
"X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
|
||||
"y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
|
||||
"X_new = X_test[:3]\n",
|
||||
"\n",
|
||||
"n_workers = 2\n",
|
||||
"batch_size = 32 * n_workers\n",
|
||||
"dataset = tf.data.Dataset.from_tensor_slices((X_train[..., np.newaxis], y_train)).repeat().batch(batch_size)\n",
|
||||
" \n",
|
||||
"def create_model():\n",
|
||||
" return keras.models.Sequential([\n",
|
||||
"with distribution.scope():\n",
|
||||
" model = keras.models.Sequential([\n",
|
||||
" keras.layers.Conv2D(filters=64, kernel_size=7, activation=\"relu\",\n",
|
||||
" padding=\"same\", input_shape=[28, 28, 1]),\n",
|
||||
" keras.layers.MaxPooling2D(pool_size=2),\n",
|
||||
|
@ -1096,14 +1070,62 @@
|
|||
" keras.layers.Dropout(0.5),\n",
|
||||
" keras.layers.Dense(units=10, activation='softmax'),\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"with distribution.scope():\n",
|
||||
" model = create_model()\n",
|
||||
" model.compile(loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=keras.optimizers.SGD(lr=1e-2),\n",
|
||||
" metrics=[\"accuracy\"])\n",
|
||||
"\n",
|
||||
"model.fit(dataset, steps_per_epoch=len(X_train)//batch_size, epochs=10)"
|
||||
"model.fit(X_train, y_train, validation_data=(X_valid, y_valid),\n",
|
||||
" epochs=10, callbacks=callbacks)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In a real world application, there would typically be a single worker per machine, but in this example we're running both workers on the same machine, so they will both try to use all the available GPU RAM (if this machine has a GPU), and this will likely lead to an Out-Of-Memory (OOM) error. To avoid this, we could use the `CUDA_VISIBLE_DEVICES` environment variable to assign a different GPU to each worker. Alternatively, we can simply disable GPU support, like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We are now ready to start both workers, each in its own process, using Python's `subprocess` module. Before we start each process, we need to set the `TF_CONFIG` environment variable appropriately, changing only the task index:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"cluster_spec = {\"worker\": [\"127.0.0.1:9901\", \"127.0.0.1:9902\"]}\n",
|
||||
"\n",
|
||||
"for index, worker_address in enumerate(cluster_spec[\"worker\"]):\n",
|
||||
" os.environ[\"TF_CONFIG\"] = json.dumps({\n",
|
||||
" \"cluster\": cluster_spec,\n",
|
||||
" \"task\": {\"type\": \"worker\", \"index\": index}\n",
|
||||
" })\n",
|
||||
" subprocess.Popen(\"python my_mnist_multiworker_task.py\", shell=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"That's it! Our TensorFlow cluster is now running, but we can't see it in this notebook because it's running in separate processes (but if you are running this notebook in Jupyter, you can see the worker logs in Jupyter's server logs).\n",
|
||||
"\n",
|
||||
"Since the chief (worker #0) is writing to TensorBoard, we use TensorBoard to view the training progress. Run the following cell, then click on the settings button (i.e., the gear icon) in the TensorBoard interface and check the \"Reload data\" box to make TensorBoard automatically refresh every 30s. Once the first epoch of training is finished (which may take a few minutes), and once TensorBoard refreshes, the SCALARS tab will appear. Click on this tab to view the progress of the model's training and validation accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1112,12 +1134,15 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Hyperparameter tuning\n",
|
||||
"\n",
|
||||
"# Only talk to ps server\n",
|
||||
"config_proto = tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:%d' % tf_config['task']['index']])\n",
|
||||
"config = tf.estimator.RunConfig(session_config=config_proto)\n",
|
||||
"# default since 1.10"
|
||||
"%load_ext tensorboard\n",
|
||||
"%tensorboard --logdir=./my_mnist_multiworker_logs --port=6006"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"That's it! Once training is over, the best checkpoint of the model will be available in the `my_mnist_multiworker_model.h5` file. You can load it using `keras.models.load_model()` and use it for predictions, as usual:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1126,7 +1151,18 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"strategy.num_replicas_in_sync"
|
||||
"from tensorflow import keras\n",
|
||||
"\n",
|
||||
"model = keras.models.load_model(\"my_mnist_multiworker_model.h5\")\n",
|
||||
"Y_pred = model.predict(X_new)\n",
|
||||
"np.argmax(Y_pred, axis=-1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And that's all for today! Hope you found this useful. 😊"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -1146,7 +1182,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
12
INSTALL.md
12
INSTALL.md
|
@ -24,25 +24,15 @@ Once Anaconda or miniconda is installed, then run the following command to updat
|
|||
## Install the GPU Driver and Libraries
|
||||
If you have a TensorFlow-compatible GPU card (NVidia card with Compute Capability ≥ 3.5), and you want TensorFlow to use it, then you should download the latest driver for your card from [nvidia.com](https://www.nvidia.com/Download/index.aspx?lang=en-us) and install it. You will also need NVidia's CUDA and cuDNN libraries, but the good news is that they will be installed automatically when you install the tensorflow-gpu package from Anaconda. However, if you don't use Anaconda, you will have to install them manually. If you hit any roadblock, see TensorFlow's [GPU installation instructions](https://tensorflow.org/install/gpu) for more details.
|
||||
|
||||
If you want to use a GPU then you should also edit environment.yml (or environment-windows.yml if you're on Windows), located at the root of the handson-ml2 project, replace tensorflow=2.0.0 with tensorflow-gpu=2.0.0, and replace tensorflow-serving-api==2.0.0 with tensorflow-serving-api-gpu==2.0.0. This will not be needed anymore when TensorFlow 2.1 is released.
|
||||
|
||||
## Create the tf2 Environment
|
||||
Next, make sure you're in the handson-ml2 directory and run the following command. It will create a new `conda` environment containing every library you will need to run all the notebooks (by default, the environment will be named `tf2`, but you can choose another name using the `-n` option):
|
||||
|
||||
$ conda env create -f environment.yml # or environment-windows.yml on Windows
|
||||
$ conda env create -f environment.yml
|
||||
|
||||
Next, activate the new environment:
|
||||
|
||||
$ conda activate tf2
|
||||
|
||||
## Windows
|
||||
If you're on Windows, and you want to go through chapter 18 on Reinforcement Learning, then you will also need to run the following command. It installs a Windows-compatible fork of the atari-py library.
|
||||
|
||||
$ pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
|
||||
|
||||
|
||||
> **Warning**: TensorFlow Transform (used in chapter 13) and TensorFlow-AddOns (used in chapter 16) are not yet available on Windows, but the TensorFlow team is working on it.
|
||||
|
||||
|
||||
## Start Jupyter
|
||||
You're almost there! You just need to register the `tf2` conda environment to Jupyter. The notebooks in this project will default to the environment named `python3`, so it's best to register this environment using the name `python3` (if you prefer to use another name, you will have to select it in the "Kernel > Change kernel..." menu in Jupyter every time you open a notebook):
|
||||
|
|
10
README.md
10
README.md
|
@ -38,25 +38,19 @@ Read the [Docker instructions](https://github.com/ageron/handson-ml2/tree/master
|
|||
|
||||
### Want to install this project on your own machine?
|
||||
|
||||
Start by installing [Anaconda](https://www.anaconda.com/distribution/) (or [Miniconda](https://docs.conda.io/en/latest/miniconda.html)), [git](https://git-scm.com/downloads), and if you have a TensorFlow-compatible GPU, install the [GPU driver](https://www.nvidia.com/Download/index.aspx).
|
||||
Start by installing [Anaconda](https://www.anaconda.com/distribution/) (or [Miniconda](https://docs.conda.io/en/latest/miniconda.html)), [git](https://git-scm.com/downloads), and if you have a TensorFlow-compatible GPU, install the [GPU driver](https://www.nvidia.com/Download/index.aspx), as well as the appropriate version of CUDA and cuDNN (see TensorFlow's documentation for more details).
|
||||
|
||||
Next, clone this project by opening a terminal and typing the following commands (do not type the first `$` signs on each line, they just indicate that these are terminal commands):
|
||||
|
||||
$ git clone https://github.com/ageron/handson-ml2.git
|
||||
$ cd handson-ml2
|
||||
|
||||
If you want to use a GPU, then edit `environment.yml` (or `environment-windows.yml` on Windows) and replace `tensorflow=2.0.0` with `tensorflow-gpu=2.0.0`. Also replace `tensorflow-serving-api==2.0.0` with `tensorflow-serving-api-gpu==2.0.0`.
|
||||
|
||||
Next, run the following commands:
|
||||
|
||||
$ conda env create -f environment.yml # or environment-windows.yml on Windows
|
||||
$ conda env create -f environment.yml
|
||||
$ conda activate tf2
|
||||
$ python -m ipykernel install --user --name=python3
|
||||
|
||||
Then if you're on Windows, run the following command:
|
||||
|
||||
$ pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
|
||||
|
||||
Finally, start Jupyter:
|
||||
|
||||
$ jupyter notebook
|
||||
|
|
|
@ -1,56 +0,0 @@
|
|||
name: tf2
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- graphviz
|
||||
- imageio=2.6.1
|
||||
- ipython=7.10.1
|
||||
- ipywidgets=7.5.1
|
||||
- joblib=0.14.0
|
||||
- jupyter=1.0.0
|
||||
- matplotlib=3.1.2
|
||||
- nbdime=1.1.0
|
||||
- nltk=3.4.5
|
||||
- numexpr=2.7.0
|
||||
- numpy=1.17.3
|
||||
- pandas=0.25.3
|
||||
- pillow=6.2.1
|
||||
- pip
|
||||
- py-xgboost=0.90
|
||||
- pydot=1.4.1
|
||||
- pyopengl=3.1.3b2
|
||||
- python=3.7
|
||||
- python-graphviz
|
||||
- requests=2.22.0
|
||||
- scikit-image=0.16.2
|
||||
- scikit-learn=0.22
|
||||
- scipy=1.3.1
|
||||
- tqdm=4.40.0
|
||||
- wheel
|
||||
- widgetsnbextension=3.5.1
|
||||
- pip:
|
||||
#- atari-py==0.2.6 # NOT ON WINDOWS YET
|
||||
- ftfy==5.7
|
||||
- gym==0.15.4
|
||||
- opencv-python==4.1.2.30
|
||||
- psutil==5.6.7
|
||||
- pyglet==1.3.2
|
||||
- spacy==2.2.4
|
||||
- tensorboard==2.1.1
|
||||
#- tensorflow-addons==0.8.3 # NOT ON WINDOWS YET
|
||||
#- tensorflow-data-validation==0.21.5 # NOT ON WINDOWS YET
|
||||
- tensorflow-datasets==2.1.0
|
||||
- tensorflow-estimator==2.1.0
|
||||
- tensorflow-hub==0.7.0
|
||||
#- tensorflow-metadata==0.21.1 # NOT ON WINDOWS YET
|
||||
#- tensorflow-model-analysis==0.21.6 # NOT ON WINDOWS YET
|
||||
- tensorflow-probability==0.9.0
|
||||
- tensorflow-serving-api==2.1.0 # or tensorflow-serving-api-gpu if gpu
|
||||
#- tensorflow-transform==0.21.2 # NOT ON WINDOWS YET
|
||||
- tensorflow==2.1.0 # or tensorflow-gpu if gpu
|
||||
- tf-agents==0.3.0
|
||||
#- tfx==0.21.2 # NOT ON WINDOWS YET
|
||||
- transformers==2.8.0
|
||||
- urlextract==0.13.0
|
||||
#- pyvirtualdisplay # add if on headless server
|
|
@ -3,55 +3,45 @@ channels:
|
|||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- graphviz
|
||||
- imageio=2.6
|
||||
- ipython=7.12
|
||||
- ipywidgets=7.5
|
||||
- joblib=0.14
|
||||
- jupyter=1.0
|
||||
- matplotlib=3.1
|
||||
- nbdime=2.0
|
||||
- nltk=3.4
|
||||
- numexpr=2.7
|
||||
- numpy=1.18
|
||||
- pandas=1.0
|
||||
- pillow=7.0
|
||||
- pip
|
||||
- psutil=5.7
|
||||
- py-xgboost=0.90
|
||||
- pydot=1.4
|
||||
- pyglet=1.5
|
||||
- pyopengl=3.1
|
||||
- python=3.7
|
||||
- python-graphviz
|
||||
#- pyvirtualdisplay=0.2 # add if on headless server
|
||||
- requests=2.22
|
||||
- scikit-image=0.16
|
||||
- scikit-learn=0.22
|
||||
- scipy=1.4
|
||||
- tqdm=4.43
|
||||
- wheel
|
||||
- widgetsnbextension=3.5
|
||||
- atari_py=0.2 # used only in chapter 18
|
||||
- ftfy=5.8 # used only in chapter 16 by the transformers library
|
||||
- graphviz # used only in chapter 6 for dot files
|
||||
- gym=0.18 # used only in chapter 18
|
||||
- ipython=7.20 # a powerful Python shell
|
||||
- ipywidgets=7.6 # optionally used only in chapter 12 for tqdm in Jupyter
|
||||
- joblib=0.14 # used only in chapter 2 to save/load Scikit-Learn models
|
||||
- jupyter=1.0 # to edit and run Jupyter notebooks
|
||||
- matplotlib=3.3 # beautiful plots. See tutorial tools_matplotlib.ipynb
|
||||
- nbdime=2.1 # optional tool to diff Jupyter notebooks
|
||||
- nltk=3.4 # optionally used in chapter 3, exercise 4
|
||||
- numexpr=2.7 # used only in the Pandas tutorial for numerical expressions
|
||||
- numpy=1.19 # Powerful n-dimensional arrays and numerical computing tools
|
||||
- opencv=4.5 # used only in chapter 18 by TF Agents for image preprocessing
|
||||
- pandas=1.2 # data analysis and manipulation tool
|
||||
- pillow=8.1 # image manipulation library, (used by matplotlib.image.imread)
|
||||
- pip # Python's package-management system
|
||||
- py-xgboost=0.90 # used only in chapter 7 for optimized Gradient Boosting
|
||||
- pyglet=1.5 # used only in chapter 18 to render environments
|
||||
- pyopengl=3.1 # used only in chapter 18 to render environments
|
||||
- python=3.7 # Python! Not using latest version as some libs lack support
|
||||
- python-graphviz # used only in chapter 6 for dot files
|
||||
#- pyvirtualdisplay=1.3 # used only in chapter 18 if on headless server
|
||||
- requests=2.25 # used only in chapter 19 for REST API queries
|
||||
- scikit-learn=0.24 # machine learning library
|
||||
- scipy=1.6 # scientific/technical computing library
|
||||
- tqdm=4.56 # a progress bar library
|
||||
- transformers=4.3 # Natural Language Processing lib for TF or PyTorch
|
||||
- wheel # built-package format for pip
|
||||
- widgetsnbextension=3.5 # interactive HTML widgets for Jupyter notebooks
|
||||
- pip:
|
||||
- atari-py==0.2.6
|
||||
- ftfy==5.7
|
||||
- gast==0.2.2
|
||||
- gym==0.17.1
|
||||
- opencv-python==4.2.0.32
|
||||
- spacy==2.2.4
|
||||
- tensorboard==2.1.1
|
||||
- tensorflow-addons==0.8.3
|
||||
- tensorflow-data-validation==0.21.5
|
||||
- tensorflow-datasets==2.1.0
|
||||
- tensorflow-estimator==2.1.0
|
||||
- tensorflow-hub==0.7.0
|
||||
- tensorflow-metadata==0.21.1
|
||||
- tensorflow-model-analysis==0.21.6
|
||||
- tensorflow-probability==0.9.0
|
||||
- tensorflow-serving-api==2.1.0 # or tensorflow-serving-api-gpu if gpu
|
||||
- tensorflow-transform==0.21.2
|
||||
- tensorflow==2.1.0 # or tensorflow-gpu if gpu
|
||||
- tf-agents==0.3.0
|
||||
- tfx==0.21.2
|
||||
- transformers==2.8.0
|
||||
- urlextract==0.14.0
|
||||
- tensorboard-plugin-profile==2.4.0 # profiling plugin for TensorBoard
|
||||
- tensorboard==2.4.1 # TensorFlow's visualization toolkit
|
||||
- tensorflow-addons==0.12.1 # used only in chapter 16 for a seq2seq impl.
|
||||
- tensorflow-datasets==3.0.0 # datasets repository, ready to use
|
||||
- tensorflow-hub==0.9.0 # trained ML models repository, ready to use
|
||||
- tensorflow-probability==0.12.1 # Optional. Probability/Stats lib.
|
||||
- tensorflow-serving-api==2.4.1 # or tensorflow-serving-api-gpu if gpu
|
||||
- tensorflow==2.4.1 # Deep Learning library
|
||||
- tf-agents==0.7.1 # Reinforcement Learning lib based on TensorFlow
|
||||
- tfx==0.27.0 # platform to deploy production ML pipelines
|
||||
- urlextract==1.2.0 # optionally used in chapter 3, exercise 4
|
||||
|
|
|
@ -14,6 +14,17 @@
|
|||
"_This notebook contains toy implementations of various autodiff techniques, to explain how they works._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/extra_autodiff.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -898,7 +909,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "603px",
|
||||
|
|
|
@ -14,6 +14,17 @@
|
|||
"This notebook displays an animation comparing Batch, Mini-Batch and Stochastic Gradient Descent (introduced in Chapter 4). Thanks to [Daniel Ingram](https://github.com/daniel-s-ingram) who contributed this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/extra_gradient_descent_comparison.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
|
@ -257,7 +268,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
82
index.ipynb
82
index.ipynb
|
@ -10,6 +10,17 @@
|
|||
"\n",
|
||||
"[Prerequisites](#Prerequisites) (see below)\n",
|
||||
"\n",
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/index.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Notebooks\n",
|
||||
"1. [The Machine Learning landscape](01_the_machine_learning_landscape.ipynb)\n",
|
||||
"2. [End-to-end Machine Learning project](02_end_to_end_machine_learning_project.ipynb)\n",
|
||||
|
@ -29,34 +40,65 @@
|
|||
"16. [Natural Language Processing with RNNs and Attention](16_nlp_with_rnns_and_attention.ipynb)\n",
|
||||
"17. [Representation Learning Using Autoencoders](17_autoencoders.ipynb)\n",
|
||||
"18. [Reinforcement Learning](18_reinforcement_learning.ipynb)\n",
|
||||
"19. [Training and Deploying TensorFlow Models at Scale](19_training_and_deploying_at_scale.ipynb)\n",
|
||||
"\n",
|
||||
"## Scientific Python tutorials\n",
|
||||
"* [NumPy](tools_numpy.ipynb)\n",
|
||||
"* [Matplotlib](tools_matplotlib.ipynb)\n",
|
||||
"* [Pandas](tools_pandas.ipynb)\n",
|
||||
"\n",
|
||||
"## Math Tutorials\n",
|
||||
"* [Linear Algebra](math_linear_algebra.ipynb)\n",
|
||||
"* [Differential Calculus](math_differential_calculus.ipynb)\n",
|
||||
"\n",
|
||||
"## Extra Material\n",
|
||||
"* [Auto-differentiation](extra_autodiff.ipynb)\n",
|
||||
"\n",
|
||||
"## Misc.\n",
|
||||
"* [Equations](book_equations.pdf) (list of equations in the book)\n"
|
||||
"19. [Training and Deploying TensorFlow Models at Scale](19_training_and_deploying_at_scale.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scientific Python tutorials\n",
|
||||
"* [NumPy](tools_numpy.ipynb)\n",
|
||||
"* [Matplotlib](tools_matplotlib.ipynb)\n",
|
||||
"* [Pandas](tools_pandas.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Math Tutorials\n",
|
||||
"* [Linear Algebra](math_linear_algebra.ipynb)\n",
|
||||
"* [Differential Calculus](math_differential_calculus.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extra Material\n",
|
||||
"* [Auto-differentiation](extra_autodiff.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Misc.\n",
|
||||
"* [Equations](book_equations.pdf) (list of equations in the book)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"### To understand\n",
|
||||
"* **Python** – you don't need to be an expert python programmer, but you do need to know the basics. If you don't, the official [Python tutorial](https://docs.python.org/3/tutorial/) is a good place to start.\n",
|
||||
"* **Scientific Python** – We will be using a few popular python libraries, in particular NumPy, matplotlib and pandas. If you are not familiar with these libraries, you should probably start by going through the tutorials in the Tools section (especially NumPy).\n",
|
||||
"* **Math** – We will also use some notions of Linear Algebra, Calculus, Statistics and Probability theory. You should be able to follow along if you learned these in the past as it won't be very advanced, but if you don't know about these topics or you need a refresher then go through the appropriate introduction in the Math section.\n",
|
||||
"\n",
|
||||
"* **Math** – We will also use some notions of Linear Algebra, Calculus, Statistics and Probability theory. You should be able to follow along if you learned these in the past as it won't be very advanced, but if you don't know about these topics or you need a refresher then go through the appropriate introduction in the Math section."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### To run the examples\n",
|
||||
"* **Jupyter** – These notebooks are based on Jupyter. You can run these notebooks in just one click using a hosted platform such as Binder, Deepnote or Colaboratory (no installation required), or you can just view them using Jupyter.org's viewer, or you can install everything on your machine, as you prefer. Check out the [home page](https://github.com/ageron/handson-ml2/) for more details."
|
||||
]
|
||||
|
@ -85,7 +127,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
|
|
|
@ -544,7 +544,7 @@
|
|||
"id": "Zu6u_8bw7ZUc"
|
||||
},
|
||||
"source": [
|
||||
"A word about notations: there are several other notations for the derivative that you will find in the litterature:\n",
|
||||
"A word about notations: there are several other notations for the derivative that you will find in the literature:\n",
|
||||
"\n",
|
||||
"$f'(x) = \\dfrac{\\mathrm{d}f(x)}{\\mathrm{d}x} = \\dfrac{\\mathrm{d}}{\\mathrm{d}x}f(x)$\n",
|
||||
"\n",
|
||||
|
@ -1780,7 +1780,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.6.1"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
|
|
|
@ -11,6 +11,17 @@
|
|||
"*Machine Learning relies heavily on Linear Algebra, so it is essential to understand what vectors and matrices are, what operations you can perform with them, and how they can be useful.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/math_linear_algebra.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -1347,7 +1358,7 @@
|
|||
"source": [
|
||||
"Looks good! You can check the other elements until you get used to the algorithm.\n",
|
||||
"\n",
|
||||
"We multiplied a $2 \\times 3$ matrix by a $3 \\times 4$ matrix, so the result is a $2 \\times 4$ matrix. The first matrix's number of columns has to be equal to the second matrix's number of rows. If we try to multiple $D$ by $A$, we get an error because D has 4 columns while A has 2 rows:"
|
||||
"We multiplied a $2 \\times 3$ matrix by a $3 \\times 4$ matrix, so the result is a $2 \\times 4$ matrix. The first matrix's number of columns has to be equal to the second matrix's number of rows. If we try to multiply $D$ by $A$, we get an error because D has 4 columns while A has 2 rows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -3063,7 +3074,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"toc": {
|
||||
"toc_cell": false,
|
||||
|
|
|
@ -2,70 +2,63 @@
|
|||
# on Windows or when using a GPU. Please see the installation
|
||||
# instructions in INSTALL.md
|
||||
|
||||
|
||||
##### Core scientific packages
|
||||
jupyter==1.0.0
|
||||
matplotlib==3.1.3
|
||||
numpy==1.18.1
|
||||
pandas==1.0.3
|
||||
scipy==1.4.1
|
||||
|
||||
matplotlib==3.3.4
|
||||
numpy==1.19.5
|
||||
pandas==1.2.2
|
||||
scipy==1.6.0
|
||||
|
||||
##### Machine Learning packages
|
||||
scikit-learn==0.22
|
||||
scikit-learn==0.24.1
|
||||
|
||||
# Optional: the XGBoost library is only used in chapter 7
|
||||
xgboost==1.0.2
|
||||
xgboost==1.3.3
|
||||
|
||||
# Optional: the transformers library is only using in chapter 16
|
||||
transformers==2.8.0
|
||||
transformers==4.3.2
|
||||
|
||||
##### TensorFlow-related packages
|
||||
|
||||
# If you have a TF-compatible GPU and you want to enable GPU support, then
|
||||
# replace tensorflow with tensorflow-gpu, and replace tensorflow-serving-api
|
||||
# with tensorflow-serving-api-gpu.
|
||||
# replace tensorflow-serving-api with tensorflow-serving-api-gpu.
|
||||
# Your GPU must have CUDA Compute Capability 3.5 or higher support, and
|
||||
# you must install CUDA, cuDNN and more: see tensorflow.org for the detailed
|
||||
# installation instructions.
|
||||
|
||||
tensorflow==2.1.0
|
||||
|
||||
tensorflow==2.4.1
|
||||
# Optional: the TF Serving API library is just needed for chapter 19.
|
||||
tensorflow-serving-api==2.1.0
|
||||
#tensorflow-serving-api-gpu==2.1.0
|
||||
tensorflow-serving-api==2.4.1 # or tensorflow-serving-api-gpu if gpu
|
||||
|
||||
tensorboard==2.1.1
|
||||
tensorboard-plugin-profile==2.2.0
|
||||
tensorflow-datasets==2.1.0
|
||||
tensorflow-hub==0.7.0
|
||||
tensorflow-probability==0.9.0
|
||||
tensorboard==2.4.1
|
||||
tensorboard-plugin-profile==2.4.0
|
||||
tensorflow-datasets==3.0.0
|
||||
tensorflow-hub==0.9.0
|
||||
tensorflow-probability==0.12.1
|
||||
|
||||
# Optional: only used in chapter 13.
|
||||
# NOT AVAILABLE ON WINDOWS
|
||||
tfx==0.21.2
|
||||
tfx==0.27.0
|
||||
|
||||
# Optional: only used in chapter 16.
|
||||
# NOT AVAILABLE ON WINDOWS
|
||||
tensorflow-addons==0.8.3
|
||||
tensorflow-addons==0.12.1
|
||||
|
||||
##### Reinforcement Learning library (chapter 18)
|
||||
|
||||
# There are a few dependencies you need to install first, check out:
|
||||
# https://github.com/openai/gym#installing-everything
|
||||
gym[atari]==0.17.1
|
||||
gym[atari]==0.18.0
|
||||
# On Windows, install atari_py using:
|
||||
# pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
|
||||
|
||||
tf-agents==0.3.0
|
||||
|
||||
tf-agents==0.7.1
|
||||
|
||||
##### Image manipulation
|
||||
imageio==2.6.1
|
||||
Pillow==7.0.0
|
||||
scikit-image==0.16.2
|
||||
graphviz==0.13.2
|
||||
pydot==1.4.1
|
||||
opencv-python==4.2.0.32
|
||||
Pillow==7.2.0
|
||||
graphviz==0.16
|
||||
opencv-python==4.5.1.48
|
||||
pyglet==1.5.0
|
||||
|
||||
#pyvirtualdisplay # needed in chapter 16, if on a headless server
|
||||
|
@ -78,24 +71,23 @@ pyglet==1.5.0
|
|||
joblib==0.14.1
|
||||
|
||||
# Easy http requests
|
||||
requests==2.23.0
|
||||
requests==2.25.1
|
||||
|
||||
# Nice utility to diff Jupyter Notebooks.
|
||||
nbdime==2.0.0
|
||||
nbdime==2.1.0
|
||||
|
||||
# May be useful with Pandas for complex "where" clauses (e.g., Pandas
|
||||
# tutorial).
|
||||
numexpr==2.7.1
|
||||
numexpr==2.7.2
|
||||
|
||||
# Optional: these libraries can be useful in the classification chapter,
|
||||
# exercise 4.
|
||||
nltk==3.4.5
|
||||
urlextract==0.14.0
|
||||
nltk==3.5
|
||||
urlextract==1.2.0
|
||||
|
||||
# Optional: these libraries are only used in chapter 16
|
||||
spacy==2.2.4
|
||||
ftfy==5.7
|
||||
ftfy==5.8
|
||||
|
||||
# Optional: tqdm displays nice progress bars, ipywidgets for tqdm's notebook support
|
||||
tqdm==4.43.0
|
||||
ipywidgets==7.5.1
|
||||
tqdm==4.56.1
|
||||
ipywidgets==7.6.3
|
||||
|
|
|
@ -9,6 +9,17 @@
|
|||
"*This notebook demonstrates how to use the matplotlib library to plot beautiful graphs.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/tools_matplotlib.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
|
@ -695,7 +706,7 @@
|
|||
"ax = plt.subplot(133)\n",
|
||||
"plt.plot(x, x**3)\n",
|
||||
"plt.minorticks_on()\n",
|
||||
"ax.tick_params(axis='x', which='minor', bottom='off')\n",
|
||||
"ax.tick_params(axis='x', which='minor', bottom=False)\n",
|
||||
"ax.xaxis.set_ticks([-2, 0, 1, 2])\n",
|
||||
"ax.yaxis.set_ticks(np.arange(-5, 5, 1))\n",
|
||||
"ax.yaxis.set_ticklabels([\"min\", -4, -3, -2, -1, 0, 1, 2, 3, \"max\"])\n",
|
||||
|
@ -1242,7 +1253,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"toc": {
|
||||
"toc_cell": true,
|
||||
|
|
|
@ -6,9 +6,25 @@
|
|||
"source": [
|
||||
"**Tools - NumPy**\n",
|
||||
"\n",
|
||||
"*NumPy is the fundamental library for scientific computing with Python. NumPy is centered around a powerful N-dimensional array object, and it also contains useful linear algebra, Fourier transform, and random number functions.*\n",
|
||||
"\n",
|
||||
"# Creating arrays"
|
||||
"*NumPy is the fundamental library for scientific computing with Python. NumPy is centered around a powerful N-dimensional array object, and it also contains useful linear algebra, Fourier transform, and random number functions.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/tools_numpy.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Creating Arrays"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2833,7 +2849,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"toc": {
|
||||
"toc_cell": false,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue