From 4fa5beb93a062fb47c6113c68f74620293967fe2 Mon Sep 17 00:00:00 2001
From: ziembla <woj@ziembla.com>
Date: Mon, 27 Nov 2017 17:16:51 +0100
Subject: [PATCH 1/4] Docker environment enhancements

- rearranged a Dockerfile to allow for incremental build
- switched running from root to "default new user"
- added the (easy to opt-out) configuration to use blank password
- added python-graphviz which enables DT visualization in notebooks
- added nbdime for "sensible notebook comparison"
- added custom command to "nbdiff" a notebook with its checkpointed version
- added simple README.md
---
 docker/Dockerfile             | 71 +++++++++++++++++++++++++++++------
 docker/{makefile => Makefile} |  6 ++-
 docker/README.md              | 37 ++++++++++++++++++
 docker/bashrc                 | 12 ++++++
 docker/docker-compose.yml     |  8 ++--
 5 files changed, 117 insertions(+), 17 deletions(-)
 rename docker/{makefile => Makefile} (55%)
 create mode 100644 docker/README.md
 create mode 100644 docker/bashrc

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 492810e..7b9f389 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,19 +1,66 @@
 FROM continuumio/anaconda3
 
-WORKDIR /usr/src/project
-COPY . /usr/src/project
-
 RUN apt-get update && apt-get upgrade -y \
-	
 	&& apt-get install -y \
-	 	libpq-dev \
-	 	build-essential \
-	 	git \
+		libpq-dev \
+		build-essential \
+		git \
+		sudo \
+	&& rm -rf /var/lib/apt/lists/*
 
-	&& rm -rf /var/lib/apt/lists/* \
+RUN conda install -y -c conda-forge \
+		tensorflow=1.0.0 \
+		jupyter_contrib_nbextensions
 
-	&& conda install -y -c conda-forge tensorflow=1.0.0 \
-	&& conda install -y -c conda-forge jupyter_contrib_nbextensions \
+ARG username
 
-	&& jupyter contrib nbextension install --user \
-	&& jupyter nbextension enable toc2/main 
+RUN adduser ${username} --gecos '' --disabled-password && \
+	echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} && \
+	chmod 0440 /etc/sudoers.d/${username}
+
+ENV HOME /home/${username}
+
+WORKDIR ${HOME}/handson-ml
+RUN chown ${username}:${username} ${HOME}/handson-ml
+
+USER ${username}
+
+RUN jupyter contrib nbextension install --user
+RUN jupyter nbextension enable toc2/main
+
+
+# INFO: Have RUN command below uncommented to for easy and constant URL (just localhost:8888)
+#       (by setting empty password instead of using a token)
+#       To avoid making a security hole the best would be to regenerate a hash for
+#       your own non-empty password and to replace the hash below.
+#       You can compute a password hash in the notebook, just run the code:
+#          from notebook.auth import passwd
+#          passwd()
+RUN mkdir -p ${HOME}/.jupyter && \
+	echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
+	>> ${HOME}/.jupyter/jupyter_notebook_config.py
+
+
+# INFO: Below - work in progress, nbdime not totally integrated, still:
+# 1. enables diffing notebooks via nbdiff after connecting to container by "make exec" (docker exec)
+#  Use:
+#      nbd NOTEBOOK_NAME.ipynb
+#    to get nbdiff between checkpointed version and current version of the given notebook
+# 2. allows decision tree visualization in notebook
+#  Use:
+#      from sklearn import tree
+#      from graphviz import Source
+#      Source(tree.export_graphviz(tree_clf, out_file=None, feature_names=iris.feature_names[2:]))
+
+USER root
+WORKDIR /
+
+RUN conda install -y -c conda-forge nbdime
+RUN conda install -y -c conda-forge python-graphviz
+
+USER ${username}
+WORKDIR ${HOME}/handson-ml
+
+COPY docker/bashrc /tmp/bashrc
+RUN cat /tmp/bashrc >> ${HOME}/.bashrc
+RUN sudo rm -rf /tmp/bashrc
diff --git a/docker/makefile b/docker/Makefile
similarity index 55%
rename from docker/makefile
rename to docker/Makefile
index d4eb6f0..6078fc9 100644
--- a/docker/makefile
+++ b/docker/Makefile
@@ -4,9 +4,11 @@ help:
 run:
 	docker-compose up
 exec:
-	docker-compose exec -ti hondson-ml /bin/bash
+	docker-compose exec handson-ml /bin/bash
 build: stop .FORCE
-	docker-compose build --force-rm 
+	docker-compose build
+rebuild: stop .FORCE
+	docker-compose build --force-rm
 stop:
 	docker stop handson-ml || true; docker rm handson-ml || true;
 .FORCE:
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..7f8904d
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,37 @@
+
+# Hands-on Machine Learning in Docker :-)
+
+This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!
+OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands).
+
+## Prerequisites
+
+As stated, the two things you need is `docker` and `docker-compose`.
+
+Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) and [Install Docker Compose](https://docs.docker.com/compose/install/) for your environment if you haven't got `docker` already.
+
+Some general knowledge about `docker` infrastructure might be useful (that's an interesting topic on its own) but is not strictly *required* to just run the notebooks.
+
+## Usage
+
+### Prepare the image (once)
+
+Switch to `docker` directory here and run `make build` (or `docker-compose build`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
+
+After the process is finished you have a `handson-ml` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
+
+### Run the notebooks
+
+Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to <http://localhost:8888> or the URL printed on the screen and you're ready to play with the book's code!
+
+The server runs in the directory containing the notebooks, and the changes you make from the browser will be persisted there.
+
+You can close the server just by pressing `Ctrl-C` in terminal window.
+
+### Run additional commands in container
+
+Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image.
+
+One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks]<https://github.com/jupyter/nbdime> for more details.
+
+You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, defined in the Dockerfile) to compare one of your notebooks with its `checkpointed` version. To be precise, the output will tell you "what modifications should be re-played on the *manually saved* version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the *current* i.e. *auto-saved* version (given as command's argument - located in working directory)".
diff --git a/docker/bashrc b/docker/bashrc
new file mode 100644
index 0000000..3535389
--- /dev/null
+++ b/docker/bashrc
@@ -0,0 +1,12 @@
+alias ll="ls -l"
+
+nbd() {
+	DIRNAME=$(dirname "$1")
+	BASENAME=$(basename "$1" .ipynb)
+
+	WORKING_COPY=$DIRNAME/$BASENAME.ipynb
+	CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
+
+	# echo "How change $CHECKPOINT_COPY into $WORKING_COPY"
+	nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY"
+}
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index b44d1e1..f9dc4fa 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -1,9 +1,11 @@
 version: "3"
 services:
   handson-ml:
-    build: 
+    build:
       context: ../
       dockerfile: ./docker/Dockerfile
+      args:
+        - username=devel
     container_name: handson-ml
     image: handson-ml
     logging:
@@ -13,5 +15,5 @@ services:
     ports:
       - "8888:8888"
     volumes:
-      - ../:/usr/src/project
-    command: /opt/conda/bin/jupyter notebook --ip='*' --port=8888 --no-browser --allow-root
\ No newline at end of file
+      - ../:/home/devel/handson-ml
+    command: /opt/conda/bin/jupyter notebook --ip='*' --port=8888 --no-browser

From fc355ca6b98f0fae575b5806b6428338787eb32c Mon Sep 17 00:00:00 2001
From: ziembla <woj@ziembla.com>
Date: Tue, 28 Nov 2017 09:57:47 +0100
Subject: [PATCH 2/4] Rolled-back graphviz

I gave up on enabling DT visualization for the moment, as graphviz integration
stopped working... I can't run dot in the image any more, receiving:

Could not load "/opt/conda/lib/graphviz/libgvplugin_pango.so.6" - file not found

As far as I was able to diagnose thats (with ldd), that's because libiconv.so.2
is not available. But according to what I see in mailgroups that should not be
needed (on debian at least)...
---
 docker/Dockerfile | 14 ++-------
 docker/README.md  | 75 ++++++++++++++++++++++++-----------------------
 2 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7b9f389..32c1e04 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -41,26 +41,18 @@ RUN mkdir -p ${HOME}/.jupyter && \
 	>> ${HOME}/.jupyter/jupyter_notebook_config.py
 
 
-# INFO: Below - work in progress, nbdime not totally integrated, still:
-# 1. enables diffing notebooks via nbdiff after connecting to container by "make exec" (docker exec)
+# INFO: Below - work in progress, nbdime not totally integrated, still it enables diffing
+#       notebooks via nbdiff after connecting to container by "make exec" (docker exec)
 #  Use:
 #      nbd NOTEBOOK_NAME.ipynb
 #    to get nbdiff between checkpointed version and current version of the given notebook
-# 2. allows decision tree visualization in notebook
-#  Use:
-#      from sklearn import tree
-#      from graphviz import Source
-#      Source(tree.export_graphviz(tree_clf, out_file=None, feature_names=iris.feature_names[2:]))
-
 USER root
 WORKDIR /
-
 RUN conda install -y -c conda-forge nbdime
-RUN conda install -y -c conda-forge python-graphviz
-
 USER ${username}
 WORKDIR ${HOME}/handson-ml
 
+
 COPY docker/bashrc /tmp/bashrc
 RUN cat /tmp/bashrc >> ${HOME}/.bashrc
 RUN sudo rm -rf /tmp/bashrc
diff --git a/docker/README.md b/docker/README.md
index 7f8904d..57580aa 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,37 +1,38 @@
-
-# Hands-on Machine Learning in Docker :-)
-
-This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!
-OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands).
-
-## Prerequisites
-
-As stated, the two things you need is `docker` and `docker-compose`.
-
-Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) and [Install Docker Compose](https://docs.docker.com/compose/install/) for your environment if you haven't got `docker` already.
-
-Some general knowledge about `docker` infrastructure might be useful (that's an interesting topic on its own) but is not strictly *required* to just run the notebooks.
-
-## Usage
-
-### Prepare the image (once)
-
-Switch to `docker` directory here and run `make build` (or `docker-compose build`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
-
-After the process is finished you have a `handson-ml` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
-
-### Run the notebooks
-
-Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to <http://localhost:8888> or the URL printed on the screen and you're ready to play with the book's code!
-
-The server runs in the directory containing the notebooks, and the changes you make from the browser will be persisted there.
-
-You can close the server just by pressing `Ctrl-C` in terminal window.
-
-### Run additional commands in container
-
-Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image.
-
-One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks]<https://github.com/jupyter/nbdime> for more details.
-
-You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, defined in the Dockerfile) to compare one of your notebooks with its `checkpointed` version. To be precise, the output will tell you "what modifications should be re-played on the *manually saved* version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the *current* i.e. *auto-saved* version (given as command's argument - located in working directory)".
+
+# Hands-on Machine Learning in Docker :-)
+
+This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!<br/>
+OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands).
+
+## Prerequisites
+
+As stated, the two things you need is `docker` and `docker-compose`.
+
+Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) and [Install Docker Compose](https://docs.docker.com/compose/install/) for your environment if you haven't got `docker` already.
+
+Some general knowledge about `docker` infrastructure might be useful (that's an interesting topic on its own) but is not strictly *required* to just run the notebooks.
+
+## Usage
+
+### Prepare the image (once)
+
+Switch to `docker` directory here and run `make build` (or `docker-compose build`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
+
+After the process is finished you have a `handson-ml` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
+
+### Run the notebooks
+
+Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to <http://localhost:8888> (empty password) or the URL printed on the screen and you're ready to play with the book's code!
+
+The server runs in the directory containing the notebooks, and the changes you make from the browser will be persisted there.
+
+You can close the server just by pressing `Ctrl-C` in terminal window.
+
+### Run additional commands in container
+
+Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image.
+
+One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details.
+
+You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, see bashrc file) to compare one of your notebooks with its `checkpointed` version.<br/>
+To be precise, the output will tell you *what modifications should be re-played on the **manually saved** version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the **current** i.e. **auto-saved** version (given as command's argument - located in working directory)*.

From 93623e8cc267300b85e6f709b0d8b877541e67bb Mon Sep 17 00:00:00 2001
From: ziembla <woj@ziembla.com>
Date: Tue, 28 Nov 2017 10:33:20 +0100
Subject: [PATCH 3/4] Dockerfile argument to set container user UID

The UID of created user should be set to the UID of the user that will
be running the image/container to set appropriate permissions on files
within notebooks directory. Default 1000 should be ok for users using
the default account created while installing linux.
---
 docker/Dockerfile         | 3 ++-
 docker/docker-compose.yml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 32c1e04..5578db8 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -13,8 +13,9 @@ RUN conda install -y -c conda-forge \
 		jupyter_contrib_nbextensions
 
 ARG username
+ARG userid
 
-RUN adduser ${username} --gecos '' --disabled-password && \
+RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password && \
 	echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} && \
 	chmod 0440 /etc/sudoers.d/${username}
 
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index f9dc4fa..8a9718c 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -6,6 +6,7 @@ services:
       dockerfile: ./docker/Dockerfile
       args:
         - username=devel
+        - userid=1000
     container_name: handson-ml
     image: handson-ml
     logging:

From 7093626bc2c827cd9ab4d6531c6bd6b939d182aa Mon Sep 17 00:00:00 2001
From: ziembla <woj@ziembla.com>
Date: Wed, 29 Nov 2017 16:20:07 +0100
Subject: [PATCH 4/4] Jupyter switched to token authentication by default,
 enabled nbdime integration in git diff

---
 docker/Dockerfile | 38 +++++++++++++++++++++++++-------------
 docker/README.md  |  4 +++-
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5578db8..54e5510 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -15,9 +15,9 @@ RUN conda install -y -c conda-forge \
 ARG username
 ARG userid
 
-RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password && \
-	echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} && \
-	chmod 0440 /etc/sudoers.d/${username}
+RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \
+	&& echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \
+	&& chmod 0440 /etc/sudoers.d/${username}
 
 ENV HOME /home/${username}
 
@@ -30,29 +30,41 @@ RUN jupyter contrib nbextension install --user
 RUN jupyter nbextension enable toc2/main
 
 
-# INFO: Have RUN command below uncommented to for easy and constant URL (just localhost:8888)
-#       (by setting empty password instead of using a token)
-#       To avoid making a security hole the best would be to regenerate a hash for
-#       your own non-empty password and to replace the hash below.
-#       You can compute a password hash in the notebook, just run the code:
+# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888)
+#       That will switch jupyter to using empty password instead of a token.
+#       To avoid making a security hole you SHOULD in fact not only uncomment but
+#       regenerate the hash for your own non-empty password and replace the hash below.
+#       You can compute a password hash in any notebook, just run the code:
 #          from notebook.auth import passwd
 #          passwd()
-RUN mkdir -p ${HOME}/.jupyter && \
-	echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
-	>> ${HOME}/.jupyter/jupyter_notebook_config.py
+#       and take the hash from the output
+#RUN mkdir -p ${HOME}/.jupyter && \
+#	echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
+#	>> ${HOME}/.jupyter/jupyter_notebook_config.py
+
+# INFO: Uncomment the RUN command below to disable git diff paging
+#RUN git config --global core.pager ''
 
 
 # INFO: Below - work in progress, nbdime not totally integrated, still it enables diffing
-#       notebooks via nbdiff after connecting to container by "make exec" (docker exec)
-#  Use:
+#       notebooks with nbdiff (and nbdiff support in git diff command) after connecting to
+#       the container by "make exec" (docker exec)
+#  Try:
 #      nbd NOTEBOOK_NAME.ipynb
 #    to get nbdiff between checkpointed version and current version of the given notebook
 USER root
 WORKDIR /
+
 RUN conda install -y -c conda-forge nbdime
+
 USER ${username}
 WORKDIR ${HOME}/handson-ml
 
+RUN git-nbdiffdriver config --enable --global
+
+# INFO: Uncomment the RUN command below to ignore metadata in nbdiff within git diff
+#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
+
 
 COPY docker/bashrc /tmp/bashrc
 RUN cat /tmp/bashrc >> ${HOME}/.bashrc
diff --git a/docker/README.md b/docker/README.md
index 57580aa..50b6f12 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -22,7 +22,7 @@ After the process is finished you have a `handson-ml` image, that will be the ba
 
 ### Run the notebooks
 
-Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to <http://localhost:8888> (empty password) or the URL printed on the screen and you're ready to play with the book's code!
+Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to the URL printed on the screen (or just <http://localhost:8888> if you enabled password authentication) and you're ready to play with the book's code!
 
 The server runs in the directory containing the notebooks, and the changes you make from the browser will be persisted there.
 
@@ -34,5 +34,7 @@ Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is r
 
 One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details.
 
+You can see changes you made relative to the version in git using `git diff` which is integrated with `nbdiff`.
+
 You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, see bashrc file) to compare one of your notebooks with its `checkpointed` version.<br/>
 To be precise, the output will tell you *what modifications should be re-played on the **manually saved** version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the **current** i.e. **auto-saved** version (given as command's argument - located in working directory)*.