From 9328b858c55c98562256bfa70b93e57ea7fc5172 Mon Sep 17 00:00:00 2001 From: rickiepark Date: Thu, 21 Dec 2017 16:03:37 +0900 Subject: [PATCH 1/4] add gitignore, environment.yml --- .gitignore | 2 ++ environment.yml | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 environment.yml diff --git a/.gitignore b/.gitignore index c77a27e..b8f995c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ my_* datasets/words datasets/flowers datasets/spam +*.gz +datasets/mnist/train-labels-idx1-ubyte diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..d930bee --- /dev/null +++ b/environment.yml @@ -0,0 +1,16 @@ +name: handson-ml +dependencies: +- python=3.5 +- jupyter +- matplotlib +- numexpr +- numpy +- pandas +- Pillow +- psutil +- scikit-learn +- scipy +- sympy +- pip: + - tensorflow + - watermark From 385d635e929fafbb396bb99171b0292a8bcda078 Mon Sep 17 00:00:00 2001 From: rickiepark Date: Tue, 30 Jan 2018 17:19:37 +0900 Subject: [PATCH 2/4] add params for avoiding warn and improving perf. --- 02_end_to_end_machine_learning_project.ipynb | 2 +- 03_classification.ipynb | 6 +++--- 04_training_linear_models.ipynb | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index 1e51f9a..14a7c20 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -1503,7 +1503,7 @@ "forest_reg = RandomForestRegressor(random_state=42)\n", "# train across 5 folds, that's a total of (12+6)*5=90 rounds of training \n", "grid_search = GridSearchCV(forest_reg, param_grid, cv=5,\n", - " scoring='neg_mean_squared_error')\n", + " scoring='neg_mean_squared_error', return_train_score=True)\n", "grid_search.fit(housing_prepared, housing_labels)" ] }, diff --git a/03_classification.ipynb b/03_classification.ipynb index 0f8b455..7c66716 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -241,7 +241,7 @@ "source": [ "from sklearn.linear_model import SGDClassifier\n", "\n", - "sgd_clf = SGDClassifier(random_state=42)\n", + "sgd_clf = SGDClassifier(max_iter=5, random_state=42)\n", "sgd_clf.fit(X_train, y_train_5)" ] }, @@ -766,7 +766,7 @@ "outputs": [], "source": [ "from sklearn.multiclass import OneVsOneClassifier\n", - "ovo_clf = OneVsOneClassifier(SGDClassifier(random_state=42))\n", + "ovo_clf = OneVsOneClassifier(SGDClassifier(max_iter=5, random_state=42))\n", "ovo_clf.fit(X_train, y_train)\n", "ovo_clf.predict([some_digit])" ] @@ -1185,7 +1185,7 @@ "param_grid = [{'weights': [\"uniform\", \"distance\"], 'n_neighbors': [3, 4, 5]}]\n", "\n", "knn_clf = KNeighborsClassifier()\n", - "grid_search = GridSearchCV(knn_clf, param_grid, cv=5, verbose=3)\n", + "grid_search = GridSearchCV(knn_clf, param_grid, cv=5, verbose=3, n_jobs=-1)\n", "grid_search.fit(X_train, y_train)" ] }, diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb index a32fdea..4cf264e 100644 --- a/04_training_linear_models.ipynb +++ b/04_training_linear_models.ipynb @@ -452,7 +452,7 @@ "outputs": [], "source": [ "from sklearn.linear_model import SGDRegressor\n", - "sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1, random_state=42)\n", + "sgd_reg = SGDRegressor(max_iter=50, penalty=None, eta0=0.1, random_state=42)\n", "sgd_reg.fit(X, y.ravel())" ] }, @@ -880,7 +880,7 @@ }, "outputs": [], "source": [ - "sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n", + "sgd_reg = SGDRegressor(max_iter=5, penalty=\"l2\", random_state=42)\n", "sgd_reg.fit(X, y.ravel())\n", "sgd_reg.predict([[1.5]])" ] @@ -981,7 +981,7 @@ "X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n", "X_val_poly_scaled = poly_scaler.transform(X_val)\n", "\n", - "sgd_reg = SGDRegressor(n_iter=1,\n", + "sgd_reg = SGDRegressor(max_iter=1,\n", " penalty=None,\n", " eta0=0.0005,\n", " warm_start=True,\n", @@ -1030,7 +1030,7 @@ "outputs": [], "source": [ "from sklearn.base import clone\n", - "sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None,\n", + "sgd_reg = SGDRegressor(max_iter=1, warm_start=True, penalty=None,\n", " learning_rate=\"constant\", eta0=0.0005, random_state=42)\n", "\n", "minimum_val_error = float(\"inf\")\n", From 483bc589cef24b7e4bfce066f85bbb618329ed2c Mon Sep 17 00:00:00 2001 From: rickiepark Date: Tue, 30 Jan 2018 17:26:07 +0900 Subject: [PATCH 3/4] sync with upstream --- .gitignore | 2 -- environment.yml | 16 ---------------- 2 files changed, 18 deletions(-) delete mode 100644 environment.yml diff --git a/.gitignore b/.gitignore index b8f995c..c77a27e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,3 @@ my_* datasets/words datasets/flowers datasets/spam -*.gz -datasets/mnist/train-labels-idx1-ubyte diff --git a/environment.yml b/environment.yml deleted file mode 100644 index d930bee..0000000 --- a/environment.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: handson-ml -dependencies: -- python=3.5 -- jupyter -- matplotlib -- numexpr -- numpy -- pandas -- Pillow -- psutil -- scikit-learn -- scipy -- sympy -- pip: - - tensorflow - - watermark From 31d2f0d6955f98d5a3f2751a1fefa118318c64c3 Mon Sep 17 00:00:00 2001 From: rickiepark Date: Tue, 30 Jan 2018 17:49:44 +0900 Subject: [PATCH 4/4] add n_jobs param --- 03_classification.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/03_classification.ipynb b/03_classification.ipynb index 7c66716..284d0a6 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -948,7 +948,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_train_knn_pred = cross_val_predict(knn_clf, X_train, y_multilabel, cv=3)\n", + "y_train_knn_pred = cross_val_predict(knn_clf, X_train, y_multilabel, cv=3, n_jobs=-1)\n", "f1_score(y_multilabel, y_train_knn_pred, average=\"macro\")" ] },