commit
5a7dc7901a
|
@ -0,0 +1 @@
|
|||
COMPOSE_PROJECT_NAME=handson-ml
|
|
@ -9,20 +9,21 @@ RUN apt-get update && apt-get upgrade -y \
|
|||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN conda install -y -c conda-forge \
|
||||
tensorflow=1.0.0 \
|
||||
tensorflow \
|
||||
jupyter_contrib_nbextensions
|
||||
|
||||
ARG username
|
||||
ARG userid
|
||||
|
||||
ARG home=/home/${username}
|
||||
ARG workdir=${home}/handson-ml
|
||||
|
||||
RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \
|
||||
&& echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \
|
||||
&& chmod 0440 /etc/sudoers.d/${username}
|
||||
|
||||
ENV HOME /home/${username}
|
||||
|
||||
WORKDIR ${HOME}/handson-ml
|
||||
RUN chown ${username}:${username} ${HOME}/handson-ml
|
||||
WORKDIR ${workdir}
|
||||
RUN chown ${username}:${username} ${workdir}
|
||||
|
||||
USER ${username}
|
||||
|
||||
|
@ -30,6 +31,55 @@ RUN jupyter contrib nbextension install --user
|
|||
RUN jupyter nbextension enable toc2/main
|
||||
|
||||
|
||||
# INFO: Jupyter and nbdime extension are not totally integrated (anaconda image is py36,
|
||||
# nbdime checks for py35 at the moment, still the config below enables diffing
|
||||
# notebooks with nbdiff (and nbdiff support in git diff command) after connecting
|
||||
# to the container by "make exec" (or "docker-compose exec handson-ml bash")
|
||||
# You may also try running:
|
||||
# nbd NOTEBOOK_NAME.ipynb
|
||||
# to get nbdiff between checkpointed version and current version of the given notebook
|
||||
USER root
|
||||
WORKDIR /
|
||||
RUN conda install -y -c conda-forge nbdime
|
||||
USER ${username}
|
||||
WORKDIR ${workdir}
|
||||
|
||||
RUN git-nbdiffdriver config --enable --global
|
||||
|
||||
# INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either
|
||||
# metadata or details in nbdiff within git diff
|
||||
#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
|
||||
RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details'
|
||||
|
||||
|
||||
# INFO: Dirty nbdime patching (ignored if not matching)
|
||||
COPY docker/nbdime-*.patch /tmp/
|
||||
USER root
|
||||
WORKDIR /
|
||||
RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \
|
||||
/tmp/nbdime-1-details.patch || true \
|
||||
&& patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \
|
||||
/tmp/nbdime-2-toc.patch || true
|
||||
RUN rm /tmp/nbdime-*.patch
|
||||
USER ${username}
|
||||
WORKDIR ${workdir}
|
||||
|
||||
|
||||
COPY docker/bashrc.bash /tmp/
|
||||
RUN cat /tmp/bashrc.bash >> ${home}/.bashrc
|
||||
RUN echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc
|
||||
RUN sudo rm /tmp/bashrc.bash
|
||||
|
||||
|
||||
# INFO: Uncomment lines below to enable automatic save of python-only and html-only
|
||||
# exports alongside the notebook
|
||||
#COPY docker/jupyter_notebook_config.py /tmp/
|
||||
#RUN cat /tmp/jupyter_notebook_config.py >> ${home}/.jupyter/jupyter_notebook_config.py
|
||||
#RUN sudo rm /tmp/jupyter_notebook_config.py
|
||||
|
||||
# INFO: Uncomment the RUN command below to disable git diff paging
|
||||
#RUN git config --global core.pager ''
|
||||
|
||||
# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888)
|
||||
# That will switch jupyter to using empty password instead of a token.
|
||||
# To avoid making a security hole you SHOULD in fact not only uncomment but
|
||||
|
@ -38,34 +88,6 @@ RUN jupyter nbextension enable toc2/main
|
|||
# from notebook.auth import passwd
|
||||
# passwd()
|
||||
# and take the hash from the output
|
||||
#RUN mkdir -p ${HOME}/.jupyter && \
|
||||
#RUN mkdir -p ${home}/.jupyter && \
|
||||
# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
|
||||
# >> ${HOME}/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
# INFO: Uncomment the RUN command below to disable git diff paging
|
||||
#RUN git config --global core.pager ''
|
||||
|
||||
|
||||
# INFO: Below - work in progress, nbdime not totally integrated, still it enables diffing
|
||||
# notebooks with nbdiff (and nbdiff support in git diff command) after connecting to
|
||||
# the container by "make exec" (docker exec)
|
||||
# Try:
|
||||
# nbd NOTEBOOK_NAME.ipynb
|
||||
# to get nbdiff between checkpointed version and current version of the given notebook
|
||||
USER root
|
||||
WORKDIR /
|
||||
|
||||
RUN conda install -y -c conda-forge nbdime
|
||||
|
||||
USER ${username}
|
||||
WORKDIR ${HOME}/handson-ml
|
||||
|
||||
RUN git-nbdiffdriver config --enable --global
|
||||
|
||||
# INFO: Uncomment the RUN command below to ignore metadata in nbdiff within git diff
|
||||
#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
|
||||
|
||||
|
||||
COPY docker/bashrc /tmp/bashrc
|
||||
RUN cat /tmp/bashrc >> ${HOME}/.bashrc
|
||||
RUN sudo rm -rf /tmp/bashrc
|
||||
# >> ${home}/.jupyter/jupyter_notebook_config.py
|
||||
|
|
|
@ -4,7 +4,7 @@ help:
|
|||
run:
|
||||
docker-compose up
|
||||
exec:
|
||||
docker-compose exec handson-ml /bin/bash
|
||||
docker-compose exec handson-ml bash
|
||||
build: stop .FORCE
|
||||
docker-compose build
|
||||
rebuild: stop .FORCE
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Hands-on Machine Learning in Docker :-)
|
||||
# Hands-on Machine Learning in Docker
|
||||
|
||||
This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!<br/>
|
||||
OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands).
|
||||
|
@ -32,7 +32,9 @@ You can close the server just by pressing `Ctrl-C` in terminal window.
|
|||
|
||||
Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image.
|
||||
|
||||
One of the usefull things that can be done there may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details.
|
||||
One of the usefull things that can be done there would be starting TensorBoard (for example with simple `tb` command, see bashrc file).
|
||||
|
||||
Another one may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details.
|
||||
|
||||
You can see changes you made relative to the version in git using `git diff` which is integrated with `nbdiff`.
|
||||
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
alias ll="ls -l"
|
||||
|
||||
nbd() {
|
||||
DIRNAME=$(dirname "$1")
|
||||
BASENAME=$(basename "$1" .ipynb)
|
||||
|
||||
WORKING_COPY=$DIRNAME/$BASENAME.ipynb
|
||||
CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
|
||||
|
||||
# echo "How change $CHECKPOINT_COPY into $WORKING_COPY"
|
||||
nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY"
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
alias ll="ls -alF"
|
||||
alias nbd="nbdiff_checkpoint"
|
||||
alias tb="tensorboard --logdir=tf_logs"
|
|
@ -0,0 +1,116 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import collections
|
||||
import glob
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
class NotebookAnalyser:
|
||||
|
||||
def __init__(self, dry_run=False, verbose=False, colorful=False):
|
||||
self._dry_run = dry_run
|
||||
self._verbose = verbose
|
||||
self._colors = collections.defaultdict(lambda: "")
|
||||
if colorful:
|
||||
for color in [
|
||||
NotebookAnalyser.COLOR_WHITE,
|
||||
NotebookAnalyser.COLOR_RED,
|
||||
NotebookAnalyser.COLOR_GREEN,
|
||||
NotebookAnalyser.COLOR_YELLOW,
|
||||
]:
|
||||
self._colors[color] = "\033[{}m".format(color)
|
||||
|
||||
NOTEBOOK_SUFFIX = ".ipynb"
|
||||
CHECKPOINT_DIR = NOTEBOOK_SUFFIX + "_checkpoints"
|
||||
CHECKPOINT_MASK = "*-checkpoint" + NOTEBOOK_SUFFIX
|
||||
CHECKPOINT_MASK_LEN = len(CHECKPOINT_MASK) - 1
|
||||
|
||||
@staticmethod
|
||||
def get_hash(file_path):
|
||||
with open(file_path, "rb") as input:
|
||||
hash = hashlib.md5()
|
||||
for chunk in iter(lambda: input.read(4096), b""):
|
||||
hash.update(chunk)
|
||||
return hash.hexdigest()
|
||||
|
||||
MESSAGE_ORPHANED = "missing "
|
||||
MESSAGE_MODIFIED = "modified"
|
||||
MESSAGE_DELETED = "DELETING"
|
||||
|
||||
COLOR_WHITE = "0"
|
||||
COLOR_RED = "31"
|
||||
COLOR_GREEN = "32"
|
||||
COLOR_YELLOW = "33"
|
||||
|
||||
def log(self, message, file, color=COLOR_WHITE):
|
||||
color_on = self._colors[color]
|
||||
color_off = self._colors[NotebookAnalyser.COLOR_WHITE]
|
||||
print("{}{}{}: {}".format(color_on, message, color_off, file))
|
||||
|
||||
def clean_checkpoints(self, directory):
|
||||
for checkpoint_path in sorted(glob.glob(os.path.join(directory, NotebookAnalyser.CHECKPOINT_MASK))):
|
||||
|
||||
workfile_dir = os.path.dirname(os.path.dirname(checkpoint_path))
|
||||
workfile_name = os.path.basename(checkpoint_path)[:-NotebookAnalyser.CHECKPOINT_MASK_LEN] + NotebookAnalyser.NOTEBOOK_SUFFIX
|
||||
workfile_path = os.path.join(workfile_dir, workfile_name)
|
||||
|
||||
status = ""
|
||||
if not os.path.isfile(workfile_path):
|
||||
if self._verbose:
|
||||
self.log(NotebookAnalyser.MESSAGE_ORPHANED, workfile_path, NotebookAnalyser.COLOR_RED)
|
||||
else:
|
||||
checkpoint_stat = os.stat(checkpoint_path)
|
||||
workfile_stat = os.stat(workfile_path)
|
||||
|
||||
modified = workfile_stat.st_size != checkpoint_stat.st_size
|
||||
|
||||
if not modified:
|
||||
checkpoint_hash = NotebookAnalyser.get_hash(checkpoint_path)
|
||||
workfile_hash = NotebookAnalyser.get_hash(workfile_path)
|
||||
modified = checkpoint_hash != workfile_hash
|
||||
|
||||
if modified:
|
||||
if self._verbose:
|
||||
self.log(NotebookAnalyser.MESSAGE_MODIFIED, workfile_path, NotebookAnalyser.COLOR_YELLOW)
|
||||
else:
|
||||
self.log(NotebookAnalyser.MESSAGE_DELETED, checkpoint_path, NotebookAnalyser.COLOR_GREEN)
|
||||
if not self._dry_run:
|
||||
os.remove(checkpoint_path)
|
||||
|
||||
if not self._dry_run and not os.listdir(directory):
|
||||
self.log(NotebookAnalyser.MESSAGE_DELETED, directory, NotebookAnalyser.COLOR_GREEN)
|
||||
os.rmdir(directory)
|
||||
|
||||
def clean_checkpoints_recursively(self, directory):
|
||||
for (root, subdirs, files) in os.walk(directory):
|
||||
subdirs.sort() # INFO: traverse alphabetically
|
||||
if NotebookAnalyser.CHECKPOINT_DIR in subdirs:
|
||||
subdirs.remove(NotebookAnalyser.CHECKPOINT_DIR) # INFO: don't recurse there
|
||||
self.clean_checkpoints(os.path.join(root, NotebookAnalyser.CHECKPOINT_DIR))
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Remove checkpointed versions of those jupyter notebooks that are identical to their working copies.",
|
||||
epilog="""Notebooks will be reported as either
|
||||
"DELETED" if the working copy and checkpointed version are identical
|
||||
(checkpoint will be deleted),
|
||||
"missing" if there is a checkpoint but no corresponding working file can be found
|
||||
or "modified" if notebook and the checkpoint are not byte-to-byte identical.
|
||||
If removal of checkpoints results in empty ".ipynb_checkpoints" directory
|
||||
that directory is also deleted.
|
||||
""") #, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument("dirs", metavar="DIR", type=str, nargs="*", default=".", help="directories to search")
|
||||
parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode")
|
||||
parser.add_argument("-c", "--color", action="store_true", help="colorful mode")
|
||||
args = parser.parse_args()
|
||||
|
||||
analyser = NotebookAnalyser(args.dry_run, args.verbose, args.color)
|
||||
for directory in args.dirs:
|
||||
analyser.clean_checkpoints_recursively(directory)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
if [[ "$#" -lt 1 || "$1" =~ ^((-h)|(--help))$ ]] ; then
|
||||
echo "usage: nbdiff_checkpoint NOTEBOOK.ipynb"
|
||||
echo
|
||||
echo "Show differences between given jupyter notebook and its checkpointed version (in .ipynb_checkpoints subdirectory)"
|
||||
exit
|
||||
fi
|
||||
|
||||
DIRNAME=$(dirname "$1")
|
||||
BASENAME=$(basename "$1" .ipynb)
|
||||
shift
|
||||
|
||||
WORKING_COPY=$DIRNAME/$BASENAME.ipynb
|
||||
CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
|
||||
|
||||
echo "----- Analysing how to change $CHECKPOINT_COPY into $WORKING_COPY -----"
|
||||
nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details "$@"
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
|
||||
def remove_empty_directories(initial_dir,
|
||||
allow_initial_delete=False, ignore_nonexistant_initial=False,
|
||||
dry_run=False, quiet=False):
|
||||
|
||||
FORBIDDEN_SUBDIRS = set([".git"])
|
||||
|
||||
if not os.path.isdir(initial_dir) and not ignore_nonexistant_initial:
|
||||
raise RuntimeError("Initial directory '{}' not found!".format(initial_dir))
|
||||
|
||||
message = "removed"
|
||||
if dry_run:
|
||||
message = "to be " + message
|
||||
|
||||
deleted = set()
|
||||
|
||||
for (directory, subdirs, files) in os.walk(initial_dir, topdown=False):
|
||||
forbidden = False
|
||||
parent = directory
|
||||
while parent:
|
||||
parent, dirname = os.path.split(parent)
|
||||
if dirname in FORBIDDEN_SUBDIRS:
|
||||
forbidden = True
|
||||
break
|
||||
if forbidden:
|
||||
continue
|
||||
|
||||
is_empty = len(files) < 1 and len(set([os.path.join(directory, s) for s in subdirs]) - deleted) < 1
|
||||
|
||||
if is_empty and (initial_dir != directory or allow_initial_delete):
|
||||
if not quiet:
|
||||
print("{}: {}".format(message, directory))
|
||||
deleted.add(directory)
|
||||
if not dry_run:
|
||||
os.rmdir(directory)
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Remove empty directories recursively in subtree.")
|
||||
parser.add_argument("dir", metavar="DIR", type=str, nargs="+", help="directory to be searched")
|
||||
parser.add_argument("-r", "--allow-dir-removal", action="store_true", help="allow deletion of DIR itself")
|
||||
parser.add_argument("-i", "--ignore-nonexistent-dir", action="store_true", help="don't throw an error if DIR doesn't exist")
|
||||
parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="don't print names of directories being removed")
|
||||
args = parser.parse_args()
|
||||
for directory in args.dir:
|
||||
remove_empty_directories(directory, args.allow_dir_removal, args.ignore_nonexistent_dir,
|
||||
args.dry_run, args.quiet)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
python -m tensorboard.main "$@"
|
|
@ -15,6 +15,7 @@ services:
|
|||
max-size: 50m
|
||||
ports:
|
||||
- "8888:8888"
|
||||
- "6006:6006"
|
||||
volumes:
|
||||
- ../:/home/devel/handson-ml
|
||||
command: /opt/conda/bin/jupyter notebook --ip='*' --port=8888 --no-browser
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
def export_script_and_view(model, os_path, contents_manager):
|
||||
if model["type"] != "notebook":
|
||||
return
|
||||
dir_name, file_name = os.path.split(os_path)
|
||||
file_base, file_ext = os.path.splitext(file_name)
|
||||
if file_base.startswith("Untitled"):
|
||||
return
|
||||
export_name = file_base if file_ext == ".ipynb" else file_name
|
||||
subprocess.check_call(["jupyter", "nbconvert", "--to", "script", file_name, "--output", export_name + "_script"], cwd=dir_name)
|
||||
subprocess.check_call(["jupyter", "nbconvert", "--to", "html", file_name, "--output", export_name + "_view"], cwd=dir_name)
|
||||
|
||||
c.FileContentsManager.post_save_hook = export_script_and_view
|
|
@ -0,0 +1,17 @@
|
|||
--- a/nbdime/diffing/notebooks.py
|
||||
+++ b/nbdime/diffing/notebooks.py
|
||||
@@ -548,8 +548,12 @@ def set_notebook_diff_targets(sources=True, outputs=True, attachments=True, meta
|
||||
metadata_keys = ("/cells/*/metadata", "/metadata", "/cells/*/outputs/*/metadata")
|
||||
if metadata:
|
||||
for key in metadata_keys:
|
||||
- if key in notebook_differs:
|
||||
- del notebook_differs[key]
|
||||
+ if details:
|
||||
+ if key in notebook_differs:
|
||||
+ del notebook_differs[key]
|
||||
+ else:
|
||||
+ notebook_differs[key] = diff_ignore_keys(
|
||||
+ inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable'])
|
||||
else:
|
||||
for key in metadata_keys:
|
||||
notebook_differs[key] = diff_ignore
|
|
@ -0,0 +1,11 @@
|
|||
--- a/nbdime/diffing/notebooks.py
|
||||
+++ b/nbdime/diffing/notebooks.py
|
||||
@@ -553,7 +553,7 @@
|
||||
del notebook_differs[key]
|
||||
else:
|
||||
notebook_differs[key] = diff_ignore_keys(
|
||||
- inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable'])
|
||||
+ inner_differ=diff, ignore_keys=['toc', 'collapsed', 'autoscroll', 'deletable', 'editable'])
|
||||
else:
|
||||
for key in metadata_keys:
|
||||
notebook_differs[key] = diff_ignore
|
Loading…
Reference in New Issue