From e7a1273c486d7629b889482e41743d956d2fac11 Mon Sep 17 00:00:00 2001 From: ziembla Date: Thu, 30 Nov 2017 06:09:45 +0100 Subject: [PATCH 01/12] Docker environment minutiae Docker compose project name set to avoid collisions, smiley dropped from README heading --- docker/.env | 1 + docker/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docker/.env diff --git a/docker/.env b/docker/.env new file mode 100644 index 0000000..16adf41 --- /dev/null +++ b/docker/.env @@ -0,0 +1 @@ +COMPOSE_PROJECT_NAME=handson-ml diff --git a/docker/README.md b/docker/README.md index 50b6f12..2355c45 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ -# Hands-on Machine Learning in Docker :-) +# Hands-on Machine Learning in Docker This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!
OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands). From 8d16b3061d5ba3b5282190c13547f33819099ede Mon Sep 17 00:00:00 2001 From: ziembla Date: Thu, 30 Nov 2017 12:09:16 +0100 Subject: [PATCH 02/12] Patches to nbdiff for skipping noisy metadata, some local config Nbdiff --ignore-details skils autoscroll, collapsed, deletable, editable, toc (pull request on the way). Enabling empty pass, no git pager, ignoring gitdiff nbdiff details. --- docker/Dockerfile | 43 ++++++++++++++++++++++++----------- docker/bashrc | 4 ++-- docker/nbdime-1-details.patch | 17 ++++++++++++++ docker/nbdime-2-toc.patch | 11 +++++++++ 4 files changed, 60 insertions(+), 15 deletions(-) create mode 100644 docker/nbdime-1-details.patch create mode 100644 docker/nbdime-2-toc.patch diff --git a/docker/Dockerfile b/docker/Dockerfile index 54e5510..6b2852e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -21,8 +21,10 @@ RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \ ENV HOME /home/${username} -WORKDIR ${HOME}/handson-ml -RUN chown ${username}:${username} ${HOME}/handson-ml +ARG workdir=${HOME}/handson-ml + +WORKDIR ${workdir} +RUN chown ${username}:${username} ${workdir} USER ${username} @@ -30,7 +32,7 @@ RUN jupyter contrib nbextension install --user RUN jupyter nbextension enable toc2/main -# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888) +## INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888) # That will switch jupyter to using empty password instead of a token. # To avoid making a security hole you SHOULD in fact not only uncomment but # regenerate the hash for your own non-empty password and replace the hash below. @@ -38,12 +40,12 @@ RUN jupyter nbextension enable toc2/main # from notebook.auth import passwd # passwd() # and take the hash from the output -#RUN mkdir -p ${HOME}/.jupyter && \ -# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ -# >> ${HOME}/.jupyter/jupyter_notebook_config.py +RUN mkdir -p ${HOME}/.jupyter && \ + echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ + >> ${HOME}/.jupyter/jupyter_notebook_config.py -# INFO: Uncomment the RUN command below to disable git diff paging -#RUN git config --global core.pager '' +## INFO: Uncomment the RUN command below to disable git diff paging +RUN git config --global core.pager '' # INFO: Below - work in progress, nbdime not totally integrated, still it enables diffing @@ -54,18 +56,33 @@ RUN jupyter nbextension enable toc2/main # to get nbdiff between checkpointed version and current version of the given notebook USER root WORKDIR / - RUN conda install -y -c conda-forge nbdime - USER ${username} -WORKDIR ${HOME}/handson-ml +WORKDIR ${workdir} RUN git-nbdiffdriver config --enable --global -# INFO: Uncomment the RUN command below to ignore metadata in nbdiff within git diff +## INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either +# metadata or details in nbdiff within git diff #RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata' +RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' + + +## +RUN ls -l /tmp/ +COPY docker/nbdime-*.patch /tmp/ +RUN ls -l /tmp/ +USER root +WORKDIR / +RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ + /tmp/nbdime-1-details.patch \ + && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ + /tmp/nbdime-2-toc.patch +RUN rm /tmp/nbdime-*.patch +USER ${username} +WORKDIR ${workdir} COPY docker/bashrc /tmp/bashrc RUN cat /tmp/bashrc >> ${HOME}/.bashrc -RUN sudo rm -rf /tmp/bashrc +RUN sudo rm /tmp/bashrc diff --git a/docker/bashrc b/docker/bashrc index 3535389..b1bce45 100644 --- a/docker/bashrc +++ b/docker/bashrc @@ -1,4 +1,4 @@ -alias ll="ls -l" +alias ll="ls -alF" nbd() { DIRNAME=$(dirname "$1") @@ -8,5 +8,5 @@ nbd() { CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb # echo "How change $CHECKPOINT_COPY into $WORKING_COPY" - nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" + nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details } diff --git a/docker/nbdime-1-details.patch b/docker/nbdime-1-details.patch new file mode 100644 index 0000000..98f76d6 --- /dev/null +++ b/docker/nbdime-1-details.patch @@ -0,0 +1,17 @@ +--- a/nbdime/diffing/notebooks.py ++++ b/nbdime/diffing/notebooks.py +@@ -548,8 +548,12 @@ def set_notebook_diff_targets(sources=True, outputs=True, attachments=True, meta + metadata_keys = ("/cells/*/metadata", "/metadata", "/cells/*/outputs/*/metadata") + if metadata: + for key in metadata_keys: +- if key in notebook_differs: +- del notebook_differs[key] ++ if details: ++ if key in notebook_differs: ++ del notebook_differs[key] ++ else: ++ notebook_differs[key] = diff_ignore_keys( ++ inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable']) + else: + for key in metadata_keys: + notebook_differs[key] = diff_ignore diff --git a/docker/nbdime-2-toc.patch b/docker/nbdime-2-toc.patch new file mode 100644 index 0000000..4924e66 --- /dev/null +++ b/docker/nbdime-2-toc.patch @@ -0,0 +1,11 @@ +--- a/nbdime/diffing/notebooks.py ++++ b/nbdime/diffing/notebooks.py +@@ -553,7 +553,7 @@ + del notebook_differs[key] + else: + notebook_differs[key] = diff_ignore_keys( +- inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable']) ++ inner_differ=diff, ignore_keys=['toc', 'collapsed', 'autoscroll', 'deletable', 'editable']) + else: + for key in metadata_keys: + notebook_differs[key] = diff_ignore From 8586120c3d21f4b0b6c11db18fe86b7b3f22f8c1 Mon Sep 17 00:00:00 2001 From: ziembla Date: Thu, 30 Nov 2017 12:59:26 +0100 Subject: [PATCH 03/12] Git filter testing demo --- docker/Dockerfile | 22 ++++++++++++++------- docker/ipynb_cleaner.py | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 7 deletions(-) create mode 100755 docker/ipynb_cleaner.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 6b2852e..5daacee 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -40,9 +40,9 @@ RUN jupyter nbextension enable toc2/main # from notebook.auth import passwd # passwd() # and take the hash from the output -RUN mkdir -p ${HOME}/.jupyter && \ - echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ - >> ${HOME}/.jupyter/jupyter_notebook_config.py +#RUN mkdir -p ${HOME}/.jupyter && \ +# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ +# >> ${HOME}/.jupyter/jupyter_notebook_config.py ## INFO: Uncomment the RUN command below to disable git diff paging RUN git config --global core.pager '' @@ -65,13 +65,11 @@ RUN git-nbdiffdriver config --enable --global ## INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either # metadata or details in nbdiff within git diff #RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata' -RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' +#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' -## -RUN ls -l /tmp/ +# INFO: Dirty nbdime patching COPY docker/nbdime-*.patch /tmp/ -RUN ls -l /tmp/ USER root WORKDIR / RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ @@ -86,3 +84,13 @@ WORKDIR ${workdir} COPY docker/bashrc /tmp/bashrc RUN cat /tmp/bashrc >> ${HOME}/.bashrc RUN sudo rm /tmp/bashrc + + +# INFO: Git filter testing +COPY docker/ipynb_cleaner.py /usr/bin/ipynb_cleaner +RUN mkdir -p ~/.config/git \ + && echo '*.ipynb filter=clean_ipynb' >> ~/.config/git/attributes \ + && git config --global filter.clean_ipynb.clean ipynb_cleaner \ + && git config --global filter.clean_ipynb.smudge cat + +# && git config --global filter.clean_ipynb.clean 'ipynb_cleaner %f' diff --git a/docker/ipynb_cleaner.py b/docker/ipynb_cleaner.py new file mode 100755 index 0000000..d34d7a6 --- /dev/null +++ b/docker/ipynb_cleaner.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +""" + +******************************** +DANGER - W.I.P. - TESTING ONLY!! +******************************** + +Clean jupyter notebook for git operations +Based on "Keeping IPython notebooks under Git version control" +(see: + https://gist.github.com/pbugnion/ea2797393033b54674af + http://pascalbugnion.net/blog/ipython-notebooks-and-git.html + http://stackoverflow.com/a/20844506/827862 +) +""" + +import sys +import json + +sys.stderr.write("\n\nCAUTION ! W.I.P ! Only dropping some test metadata, don't commit!\n\n") + +def log(x): + sys.stderr.write("\n\n[{}]\n\n\n".format(x)) +def logj(x): + sys.stderr.write("\n\n") + json.dump(x, sys.stderr, sort_keys=True, indent=1, separators=(",",": ")) + sys.stderr.write("\n\n") + +log(sys.argv) +#sys.exit(17) + +nb = sys.stdin.read() +json_in = json.loads(nb) + +logj(json_in["metadata"]) +del json_in["metadata"]["nav_menu"] +del json_in["metadata"]["toc"] +json_in["metadata"]["language_info"]["version"]="17.0" +logj(json_in["metadata"]) + +json.dump(json_in, sys.stdout, sort_keys=True, indent=1, separators=(",",": ")) From ef9df82689a0a530e50b0433033594f41cdb4af7 Mon Sep 17 00:00:00 2001 From: ziembla Date: Fri, 1 Dec 2017 10:56:36 +0100 Subject: [PATCH 04/12] Dockerfile publishable cleanup, git diff filter testing removed --- docker/Dockerfile | 54 +++++++++++++++++------------------------ docker/ipynb_cleaner.py | 42 -------------------------------- 2 files changed, 22 insertions(+), 74 deletions(-) delete mode 100755 docker/ipynb_cleaner.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 5daacee..e7efc36 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,28 +32,13 @@ RUN jupyter contrib nbextension install --user RUN jupyter nbextension enable toc2/main -## INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888) -# That will switch jupyter to using empty password instead of a token. -# To avoid making a security hole you SHOULD in fact not only uncomment but -# regenerate the hash for your own non-empty password and replace the hash below. -# You can compute a password hash in any notebook, just run the code: -# from notebook.auth import passwd -# passwd() -# and take the hash from the output -#RUN mkdir -p ${HOME}/.jupyter && \ -# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ -# >> ${HOME}/.jupyter/jupyter_notebook_config.py - -## INFO: Uncomment the RUN command below to disable git diff paging -RUN git config --global core.pager '' - - -# INFO: Below - work in progress, nbdime not totally integrated, still it enables diffing -# notebooks with nbdiff (and nbdiff support in git diff command) after connecting to -# the container by "make exec" (docker exec) -# Try: -# nbd NOTEBOOK_NAME.ipynb -# to get nbdiff between checkpointed version and current version of the given notebook +# INFO: Jupyter and nbdime extension are not totally integrated (anaconda image is py36, +# nbdime checks for py35 at the moment, still the config below enables diffing +# notebooks with nbdiff (and nbdiff support in git diff command) after connecting +# to the container by "make exec" (or "docker-compose exec handson-ml bash") +# You may also try running: +# nbd NOTEBOOK_NAME.ipynb +# to get nbdiff between checkpointed version and current version of the given notebook USER root WORKDIR / RUN conda install -y -c conda-forge nbdime @@ -62,10 +47,10 @@ WORKDIR ${workdir} RUN git-nbdiffdriver config --enable --global -## INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either +# INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either # metadata or details in nbdiff within git diff #RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata' -#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' +RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' # INFO: Dirty nbdime patching @@ -85,12 +70,17 @@ COPY docker/bashrc /tmp/bashrc RUN cat /tmp/bashrc >> ${HOME}/.bashrc RUN sudo rm /tmp/bashrc +# INFO: Uncomment the RUN command below to disable git diff paging +#RUN git config --global core.pager '' -# INFO: Git filter testing -COPY docker/ipynb_cleaner.py /usr/bin/ipynb_cleaner -RUN mkdir -p ~/.config/git \ - && echo '*.ipynb filter=clean_ipynb' >> ~/.config/git/attributes \ - && git config --global filter.clean_ipynb.clean ipynb_cleaner \ - && git config --global filter.clean_ipynb.smudge cat - -# && git config --global filter.clean_ipynb.clean 'ipynb_cleaner %f' +# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888) +# That will switch jupyter to using empty password instead of a token. +# To avoid making a security hole you SHOULD in fact not only uncomment but +# regenerate the hash for your own non-empty password and replace the hash below. +# You can compute a password hash in any notebook, just run the code: +# from notebook.auth import passwd +# passwd() +# and take the hash from the output +#RUN mkdir -p ${HOME}/.jupyter && \ +# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ +# >> ${HOME}/.jupyter/jupyter_notebook_config.py diff --git a/docker/ipynb_cleaner.py b/docker/ipynb_cleaner.py deleted file mode 100755 index d34d7a6..0000000 --- a/docker/ipynb_cleaner.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python - -""" - -******************************** -DANGER - W.I.P. - TESTING ONLY!! -******************************** - -Clean jupyter notebook for git operations -Based on "Keeping IPython notebooks under Git version control" -(see: - https://gist.github.com/pbugnion/ea2797393033b54674af - http://pascalbugnion.net/blog/ipython-notebooks-and-git.html - http://stackoverflow.com/a/20844506/827862 -) -""" - -import sys -import json - -sys.stderr.write("\n\nCAUTION ! W.I.P ! Only dropping some test metadata, don't commit!\n\n") - -def log(x): - sys.stderr.write("\n\n[{}]\n\n\n".format(x)) -def logj(x): - sys.stderr.write("\n\n") - json.dump(x, sys.stderr, sort_keys=True, indent=1, separators=(",",": ")) - sys.stderr.write("\n\n") - -log(sys.argv) -#sys.exit(17) - -nb = sys.stdin.read() -json_in = json.loads(nb) - -logj(json_in["metadata"]) -del json_in["metadata"]["nav_menu"] -del json_in["metadata"]["toc"] -json_in["metadata"]["language_info"]["version"]="17.0" -logj(json_in["metadata"]) - -json.dump(json_in, sys.stdout, sort_keys=True, indent=1, separators=(",",": ")) From 107de893049dea3afa26e432beb4158ceddf64ed Mon Sep 17 00:00:00 2001 From: ziembla Date: Fri, 1 Dec 2017 11:28:18 +0100 Subject: [PATCH 05/12] Nbdime patching ignored if the original file was changed --- docker/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index e7efc36..a8fafa0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -53,14 +53,14 @@ RUN git-nbdiffdriver config --enable --global RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details' -# INFO: Dirty nbdime patching +# INFO: Dirty nbdime patching (ignored if not matching) COPY docker/nbdime-*.patch /tmp/ USER root WORKDIR / RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ - /tmp/nbdime-1-details.patch \ + /tmp/nbdime-2-toc.patch || true \ && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ - /tmp/nbdime-2-toc.patch + /tmp/nbdime-2-toc.patch || true RUN rm /tmp/nbdime-*.patch USER ${username} WORKDIR ${workdir} From ddb9784176586d618a9e6b4cc39f5f10ae6d19a1 Mon Sep 17 00:00:00 2001 From: ziembla Date: Mon, 4 Dec 2017 11:33:16 +0100 Subject: [PATCH 06/12] tensorflow version unpined, tensorboard support, home variable fix --- docker/Dockerfile | 17 ++++++++--------- docker/README.md | 4 +++- docker/bashrc | 6 ++++++ docker/docker-compose.yml | 1 + 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a8fafa0..bfccb99 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,20 +9,19 @@ RUN apt-get update && apt-get upgrade -y \ && rm -rf /var/lib/apt/lists/* RUN conda install -y -c conda-forge \ - tensorflow=1.0.0 \ + tensorflow \ jupyter_contrib_nbextensions ARG username ARG userid +ARG home=/home/${username} +ARG workdir=${home}/handson-ml + RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \ && echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \ && chmod 0440 /etc/sudoers.d/${username} -ENV HOME /home/${username} - -ARG workdir=${HOME}/handson-ml - WORKDIR ${workdir} RUN chown ${username}:${username} ${workdir} @@ -58,7 +57,7 @@ COPY docker/nbdime-*.patch /tmp/ USER root WORKDIR / RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ - /tmp/nbdime-2-toc.patch || true \ + /tmp/nbdime-1-details.patch || true \ && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ /tmp/nbdime-2-toc.patch || true RUN rm /tmp/nbdime-*.patch @@ -67,7 +66,7 @@ WORKDIR ${workdir} COPY docker/bashrc /tmp/bashrc -RUN cat /tmp/bashrc >> ${HOME}/.bashrc +RUN cat /tmp/bashrc >> ${home}/.bashrc RUN sudo rm /tmp/bashrc # INFO: Uncomment the RUN command below to disable git diff paging @@ -81,6 +80,6 @@ RUN sudo rm /tmp/bashrc # from notebook.auth import passwd # passwd() # and take the hash from the output -#RUN mkdir -p ${HOME}/.jupyter && \ +#RUN mkdir -p ${home}/.jupyter && \ # echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ -# >> ${HOME}/.jupyter/jupyter_notebook_config.py +# >> ${home}/.jupyter/jupyter_notebook_config.py diff --git a/docker/README.md b/docker/README.md index 2355c45..037ae22 100644 --- a/docker/README.md +++ b/docker/README.md @@ -32,7 +32,9 @@ You can close the server just by pressing `Ctrl-C` in terminal window. Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image. -One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details. +One of the usefull things that can be done there would be starting TensorBoard (for example with simple `tb` command, see bashrc file). + +Another one may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details. You can see changes you made relative to the version in git using `git diff` which is integrated with `nbdiff`. diff --git a/docker/bashrc b/docker/bashrc index b1bce45..619677d 100644 --- a/docker/bashrc +++ b/docker/bashrc @@ -10,3 +10,9 @@ nbd() { # echo "How change $CHECKPOINT_COPY into $WORKING_COPY" nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details } + +tb() { + python -m tensorboard.main --logdir=tf_logs +} + +alias tensorboard="python -m tensorboard.main" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 8a9718c..d4b46e4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -15,6 +15,7 @@ services: max-size: 50m ports: - "8888:8888" + - "6006:6006" volumes: - ../:/home/devel/handson-ml command: /opt/conda/bin/jupyter notebook --ip='*' --port=8888 --no-browser From 6e4004f16c8ffe31d61c7fc127feb0d8f947cc4b Mon Sep 17 00:00:00 2001 From: ziembla Date: Sat, 9 Dec 2017 20:17:56 +0100 Subject: [PATCH 07/12] scripts for jupyter notebooks cleanup, bin subdir on path --- docker/Dockerfile | 1 + docker/bashrc | 19 +----- docker/bin/nbclean_checkpoints | 116 +++++++++++++++++++++++++++++++++ docker/bin/nbdiff_checkpoint | 9 +++ docker/bin/rm_empty_subdirs | 54 +++++++++++++++ docker/bin/tensorboard | 2 + 6 files changed, 184 insertions(+), 17 deletions(-) create mode 100755 docker/bin/nbclean_checkpoints create mode 100755 docker/bin/nbdiff_checkpoint create mode 100755 docker/bin/rm_empty_subdirs create mode 100755 docker/bin/tensorboard diff --git a/docker/Dockerfile b/docker/Dockerfile index bfccb99..adf97f1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -67,6 +67,7 @@ WORKDIR ${workdir} COPY docker/bashrc /tmp/bashrc RUN cat /tmp/bashrc >> ${home}/.bashrc +RUN echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc RUN sudo rm /tmp/bashrc # INFO: Uncomment the RUN command below to disable git diff paging diff --git a/docker/bashrc b/docker/bashrc index 619677d..ff19745 100644 --- a/docker/bashrc +++ b/docker/bashrc @@ -1,18 +1,3 @@ alias ll="ls -alF" - -nbd() { - DIRNAME=$(dirname "$1") - BASENAME=$(basename "$1" .ipynb) - - WORKING_COPY=$DIRNAME/$BASENAME.ipynb - CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb - - # echo "How change $CHECKPOINT_COPY into $WORKING_COPY" - nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details -} - -tb() { - python -m tensorboard.main --logdir=tf_logs -} - -alias tensorboard="python -m tensorboard.main" +alias nbd="nbdiff_checkpoint" +alias tb="tensorboard --logdir=tf_logs" diff --git a/docker/bin/nbclean_checkpoints b/docker/bin/nbclean_checkpoints new file mode 100755 index 0000000..ba4aaf9 --- /dev/null +++ b/docker/bin/nbclean_checkpoints @@ -0,0 +1,116 @@ +#!/usr/bin/env python + +import collections +import glob +import hashlib +import os +import subprocess + + +class NotebookAnalyser: + + def __init__(self, dry_run=False, verbose=False, colorful=False): + self._dry_run = dry_run + self._verbose = verbose + self._colors = collections.defaultdict(lambda: "") + if colorful: + for color in [ + NotebookAnalyser.COLOR_WHITE, + NotebookAnalyser.COLOR_RED, + NotebookAnalyser.COLOR_GREEN, + NotebookAnalyser.COLOR_YELLOW, + ]: + self._colors[color] = "\033[{}m".format(color) + + NOTEBOOK_SUFFIX = ".ipynb" + CHECKPOINT_DIR = NOTEBOOK_SUFFIX + "_checkpoints" + CHECKPOINT_MASK = "*-checkpoint" + NOTEBOOK_SUFFIX + CHECKPOINT_MASK_LEN = len(CHECKPOINT_MASK) - 1 + + @staticmethod + def get_hash(file_path): + with open(file_path, "rb") as input: + hash = hashlib.md5() + for chunk in iter(lambda: input.read(4096), b""): + hash.update(chunk) + return hash.hexdigest() + + MESSAGE_ORPHANED = "missing " + MESSAGE_MODIFIED = "modified" + MESSAGE_DELETED = "DELETING" + + COLOR_WHITE = "0" + COLOR_RED = "31" + COLOR_GREEN = "32" + COLOR_YELLOW = "33" + + def log(self, message, file, color=COLOR_WHITE): + color_on = self._colors[color] + color_off = self._colors[NotebookAnalyser.COLOR_WHITE] + print("{}{}{}: {}".format(color_on, message, color_off, file)) + + def clean_checkpoints(self, directory): + for checkpoint_path in sorted(glob.glob(os.path.join(directory, NotebookAnalyser.CHECKPOINT_MASK))): + + workfile_dir = os.path.dirname(os.path.dirname(checkpoint_path)) + workfile_name = os.path.basename(checkpoint_path)[:-NotebookAnalyser.CHECKPOINT_MASK_LEN] + NotebookAnalyser.NOTEBOOK_SUFFIX + workfile_path = os.path.join(workfile_dir, workfile_name) + + status = "" + if not os.path.isfile(workfile_path): + if self._verbose: + self.log(NotebookAnalyser.MESSAGE_ORPHANED, workfile_path, NotebookAnalyser.COLOR_RED) + else: + checkpoint_stat = os.stat(checkpoint_path) + workfile_stat = os.stat(workfile_path) + + modified = workfile_stat.st_size != checkpoint_stat.st_size + + if not modified: + checkpoint_hash = NotebookAnalyser.get_hash(checkpoint_path) + workfile_hash = NotebookAnalyser.get_hash(workfile_path) + modified = checkpoint_hash != workfile_hash + + if modified: + if self._verbose: + self.log(NotebookAnalyser.MESSAGE_MODIFIED, workfile_path, NotebookAnalyser.COLOR_YELLOW) + else: + self.log(NotebookAnalyser.MESSAGE_DELETED, checkpoint_path, NotebookAnalyser.COLOR_GREEN) + if not self._dry_run: + os.remove(checkpoint_path) + + if not self._dry_run and not os.listdir(directory): + self.log(NotebookAnalyser.MESSAGE_DELETED, directory, NotebookAnalyser.COLOR_GREEN) + os.rmdir(directory) + + def clean_checkpoints_recursively(self, directory): + for (root, subdirs, files) in os.walk(directory): + subdirs.sort() # INFO: traverse alphabetically + if NotebookAnalyser.CHECKPOINT_DIR in subdirs: + subdirs.remove(NotebookAnalyser.CHECKPOINT_DIR) # INFO: don't recurse there + self.clean_checkpoints(os.path.join(root, NotebookAnalyser.CHECKPOINT_DIR)) + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Remove checkpointed versions of those jupyter notebooks that are identical to their working copies.", + epilog="""Notebooks will be reported as either + "DELETED" if the working copy and checkpointed version are identical + (checkpoint will be deleted), + "missing" if there is a checkpoint but no corresponding working file can be found + or "modified" if notebook and the checkpoint are not byte-to-byte identical. + If removal of checkpoints results in empty ".ipynb_checkpoints" directory + that directory is also deleted. + """) #, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("dirs", metavar="DIR", type=str, nargs="*", default=".", help="directories to search") + parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals") + parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode") + parser.add_argument("-c", "--color", action="store_true", help="colorful mode") + args = parser.parse_args() + + analyser = NotebookAnalyser(args.dry_run, args.verbose, args.color) + for directory in args.dirs: + analyser.clean_checkpoints_recursively(directory) + +if __name__ == "__main__": + main() diff --git a/docker/bin/nbdiff_checkpoint b/docker/bin/nbdiff_checkpoint new file mode 100755 index 0000000..ffbb21c --- /dev/null +++ b/docker/bin/nbdiff_checkpoint @@ -0,0 +1,9 @@ +#!/bin/bash +DIRNAME=$(dirname "$1") +BASENAME=$(basename "$1" .ipynb) + +WORKING_COPY=$DIRNAME/$BASENAME.ipynb +CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb + +echo "How change $CHECKPOINT_COPY into $WORKING_COPY" +nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details diff --git a/docker/bin/rm_empty_subdirs b/docker/bin/rm_empty_subdirs new file mode 100755 index 0000000..8734b84 --- /dev/null +++ b/docker/bin/rm_empty_subdirs @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import os + +def remove_empty_directories(initial_dir, + allow_initial_delete=False, ignore_nonexistant_initial=False, + dry_run=False, quiet=False): + + FORBIDDEN_SUBDIRS = set([".git"]) + + if not os.path.isdir(initial_dir) and not ignore_nonexistant_initial: + raise RuntimeError("Initial directory '{}' not found!".format(initial_dir)) + + message = "removed" + if dry_run: + message = "to be " + message + + deleted = set() + + for (directory, subdirs, files) in os.walk(initial_dir, topdown=False): + forbidden = False + parent = directory + while parent: + parent, dirname = os.path.split(parent) + if dirname in FORBIDDEN_SUBDIRS: + forbidden = True + break + if forbidden: + continue + + is_empty = len(files) < 1 and len(set([os.path.join(directory, s) for s in subdirs]) - deleted) < 1 + + if is_empty and (initial_dir != directory or allow_initial_delete): + if not quiet: + print("{}: {}".format(message, directory)) + deleted.add(directory) + if not dry_run: + os.rmdir(directory) + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Remove empty directories recursively in subtree.") + parser.add_argument("dir", metavar="DIR", type=str, nargs="*", default=".", help="directory to be searched") + parser.add_argument("-r", "--allow-dir-removal", action="store_true", help="allow deletion of DIR itself") + parser.add_argument("-i", "--ignore-nonexistent-dir", action="store_true", help="don't throw an error if DIR doesn't exist") + parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals") + parser.add_argument("-q", "--quiet", action="store_true", help="don't print names of directories being removed") + args = parser.parse_args() + for directory in args.dir: + remove_empty_directories(directory, args.allow_dir_removal, args.ignore_nonexistent_dir, + args.dry_run, args.quiet) + +if __name__ == "__main__": + main() diff --git a/docker/bin/tensorboard b/docker/bin/tensorboard new file mode 100755 index 0000000..dd7294d --- /dev/null +++ b/docker/bin/tensorboard @@ -0,0 +1,2 @@ +#!/bin/bash +python -m tensorboard.main "$@" From 5bb9d6d3dfba750b7e0cbcfe26733b17e8685219 Mon Sep 17 00:00:00 2001 From: ziembla Date: Sun, 10 Dec 2017 18:38:25 +0100 Subject: [PATCH 08/12] help message for nbdiff_checkpoint --- docker/bin/nbdiff_checkpoint | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker/bin/nbdiff_checkpoint b/docker/bin/nbdiff_checkpoint index ffbb21c..2969e1b 100755 --- a/docker/bin/nbdiff_checkpoint +++ b/docker/bin/nbdiff_checkpoint @@ -1,9 +1,16 @@ #!/bin/bash +if [ "$#" -ne 1 ]; then + echo "usage: nbdiff_checkpoint NOTEBOOK.ipynb" + echo + echo "Show differences between given jupyter notebook and its checkpointed version (in .ipynb_checkpoints subdirectory)" + exit +fi + DIRNAME=$(dirname "$1") BASENAME=$(basename "$1" .ipynb) WORKING_COPY=$DIRNAME/$BASENAME.ipynb CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb -echo "How change $CHECKPOINT_COPY into $WORKING_COPY" +echo "----- Analysing how to change $CHECKPOINT_COPY into $WORKING_COPY -----" nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details From 30fef69ed026ee117db464766bd95af8c7df1d5e Mon Sep 17 00:00:00 2001 From: ziembla Date: Sun, 10 Dec 2017 18:18:33 +0000 Subject: [PATCH 09/12] rm_empty_subdirs changed to require explicit argument (defaulting to current dir withdrawn as potentially harmful) --- docker/bin/rm_empty_subdirs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/bin/rm_empty_subdirs b/docker/bin/rm_empty_subdirs index 8734b84..34f3ea9 100755 --- a/docker/bin/rm_empty_subdirs +++ b/docker/bin/rm_empty_subdirs @@ -40,7 +40,7 @@ def remove_empty_directories(initial_dir, def main(): import argparse parser = argparse.ArgumentParser(description="Remove empty directories recursively in subtree.") - parser.add_argument("dir", metavar="DIR", type=str, nargs="*", default=".", help="directory to be searched") + parser.add_argument("dir", metavar="DIR", type=str, nargs="+", help="directory to be searched") parser.add_argument("-r", "--allow-dir-removal", action="store_true", help="allow deletion of DIR itself") parser.add_argument("-i", "--ignore-nonexistent-dir", action="store_true", help="don't throw an error if DIR doesn't exist") parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals") From 1d370f40016b2f9fa88b6486b5c3a726f7aac473 Mon Sep 17 00:00:00 2001 From: ziembla Date: Mon, 11 Dec 2017 06:52:17 +0100 Subject: [PATCH 10/12] nbdiff_checkpoint parameter parsing fixed --- docker/bin/nbdiff_checkpoint | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/bin/nbdiff_checkpoint b/docker/bin/nbdiff_checkpoint index 2969e1b..9ce7cd0 100755 --- a/docker/bin/nbdiff_checkpoint +++ b/docker/bin/nbdiff_checkpoint @@ -1,5 +1,5 @@ #!/bin/bash -if [ "$#" -ne 1 ]; then +if [[ "$#" -lt 1 || "$1" =~ ^((-h)|(--help))$ ]] ; then echo "usage: nbdiff_checkpoint NOTEBOOK.ipynb" echo echo "Show differences between given jupyter notebook and its checkpointed version (in .ipynb_checkpoints subdirectory)" @@ -8,9 +8,10 @@ fi DIRNAME=$(dirname "$1") BASENAME=$(basename "$1" .ipynb) +shift WORKING_COPY=$DIRNAME/$BASENAME.ipynb CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb echo "----- Analysing how to change $CHECKPOINT_COPY into $WORKING_COPY -----" -nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details +nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details "$@" From 60bb0e4e502bdc711ca5d339b2e2d2692195c14c Mon Sep 17 00:00:00 2001 From: ziembla Date: Mon, 11 Dec 2017 16:19:24 +0100 Subject: [PATCH 11/12] Uncommentable section in Dockerfile to autosave .py and .html alongside .ipynb --- docker/Dockerfile | 13 ++++++++++--- docker/{bashrc => bashrc.bash} | 0 docker/jupyter_notebook_config.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) rename docker/{bashrc => bashrc.bash} (100%) create mode 100644 docker/jupyter_notebook_config.py diff --git a/docker/Dockerfile b/docker/Dockerfile index adf97f1..2d24d04 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -65,10 +65,17 @@ USER ${username} WORKDIR ${workdir} -COPY docker/bashrc /tmp/bashrc -RUN cat /tmp/bashrc >> ${home}/.bashrc +COPY docker/bashrc.bash /tmp/ +RUN cat /tmp/bashrc.bash >> ${home}/.bashrc RUN echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc -RUN sudo rm /tmp/bashrc +RUN sudo rm /tmp/bashrc.bash + + +# INFO: Uncomment lines below to enable automatic save of python-only and html-only +# exports alongside the notebook +#COPY docker/jupyter_notebook_config.py /tmp/ +#RUN cat /tmp/jupyter_notebook_config.py >> ${home}/.jupyter/jupyter_notebook_config.py +#RUN sudo rm /tmp/jupyter_notebook_config.py # INFO: Uncomment the RUN command below to disable git diff paging #RUN git config --global core.pager '' diff --git a/docker/bashrc b/docker/bashrc.bash similarity index 100% rename from docker/bashrc rename to docker/bashrc.bash diff --git a/docker/jupyter_notebook_config.py b/docker/jupyter_notebook_config.py new file mode 100644 index 0000000..971a49a --- /dev/null +++ b/docker/jupyter_notebook_config.py @@ -0,0 +1,15 @@ +import os +import subprocess + +def export_script_and_view(model, os_path, contents_manager): + if model["type"] != "notebook": + return + dir_name, file_name = os.path.split(os_path) + file_base, file_ext = os.path.splitext(file_name) + if file_base.startswith("Untitled"): + return + export_name = file_base if file_ext == ".ipynb" else file_name + subprocess.check_call(["jupyter", "nbconvert", "--to", "script", file_name, "--output", export_name + "_script"], cwd=dir_name) + subprocess.check_call(["jupyter", "nbconvert", "--to", "html", file_name, "--output", export_name + "_view"], cwd=dir_name) + +c.FileContentsManager.post_save_hook = export_script_and_view From 9dfaa950d2091e7f37ddba996c68c60e79e05c3b Mon Sep 17 00:00:00 2001 From: ziembla Date: Mon, 11 Dec 2017 22:02:42 +0100 Subject: [PATCH 12/12] Dockerfile to spaces --- docker/Dockerfile | 30 +++++++++++++++--------------- docker/Makefile | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2d24d04..b4ec526 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,16 +1,16 @@ FROM continuumio/anaconda3 RUN apt-get update && apt-get upgrade -y \ - && apt-get install -y \ - libpq-dev \ - build-essential \ - git \ - sudo \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y \ + libpq-dev \ + build-essential \ + git \ + sudo \ + && rm -rf /var/lib/apt/lists/* RUN conda install -y -c conda-forge \ - tensorflow \ - jupyter_contrib_nbextensions + tensorflow \ + jupyter_contrib_nbextensions ARG username ARG userid @@ -19,8 +19,8 @@ ARG home=/home/${username} ARG workdir=${home}/handson-ml RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \ - && echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \ - && chmod 0440 /etc/sudoers.d/${username} + && echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \ + && chmod 0440 /etc/sudoers.d/${username} WORKDIR ${workdir} RUN chown ${username}:${username} ${workdir} @@ -57,9 +57,9 @@ COPY docker/nbdime-*.patch /tmp/ USER root WORKDIR / RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ - /tmp/nbdime-1-details.patch || true \ - && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ - /tmp/nbdime-2-toc.patch || true + /tmp/nbdime-1-details.patch || true \ + && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \ + /tmp/nbdime-2-toc.patch || true RUN rm /tmp/nbdime-*.patch USER ${username} WORKDIR ${workdir} @@ -89,5 +89,5 @@ RUN sudo rm /tmp/bashrc.bash # passwd() # and take the hash from the output #RUN mkdir -p ${home}/.jupyter && \ -# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ -# >> ${home}/.jupyter/jupyter_notebook_config.py +# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \ +# >> ${home}/.jupyter/jupyter_notebook_config.py diff --git a/docker/Makefile b/docker/Makefile index 6078fc9..f85c49a 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -4,7 +4,7 @@ help: run: docker-compose up exec: - docker-compose exec handson-ml /bin/bash + docker-compose exec handson-ml bash build: stop .FORCE docker-compose build rebuild: stop .FORCE