scripts for jupyter notebooks cleanup, bin subdir on path
parent
ddb9784176
commit
6e4004f16c
|
@ -67,6 +67,7 @@ WORKDIR ${workdir}
|
||||||
|
|
||||||
COPY docker/bashrc /tmp/bashrc
|
COPY docker/bashrc /tmp/bashrc
|
||||||
RUN cat /tmp/bashrc >> ${home}/.bashrc
|
RUN cat /tmp/bashrc >> ${home}/.bashrc
|
||||||
|
RUN echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc
|
||||||
RUN sudo rm /tmp/bashrc
|
RUN sudo rm /tmp/bashrc
|
||||||
|
|
||||||
# INFO: Uncomment the RUN command below to disable git diff paging
|
# INFO: Uncomment the RUN command below to disable git diff paging
|
||||||
|
|
|
@ -1,18 +1,3 @@
|
||||||
alias ll="ls -alF"
|
alias ll="ls -alF"
|
||||||
|
alias nbd="nbdiff_checkpoint"
|
||||||
nbd() {
|
alias tb="tensorboard --logdir=tf_logs"
|
||||||
DIRNAME=$(dirname "$1")
|
|
||||||
BASENAME=$(basename "$1" .ipynb)
|
|
||||||
|
|
||||||
WORKING_COPY=$DIRNAME/$BASENAME.ipynb
|
|
||||||
CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
|
|
||||||
|
|
||||||
# echo "How change $CHECKPOINT_COPY into $WORKING_COPY"
|
|
||||||
nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details
|
|
||||||
}
|
|
||||||
|
|
||||||
tb() {
|
|
||||||
python -m tensorboard.main --logdir=tf_logs
|
|
||||||
}
|
|
||||||
|
|
||||||
alias tensorboard="python -m tensorboard.main"
|
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import glob
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
class NotebookAnalyser:
|
||||||
|
|
||||||
|
def __init__(self, dry_run=False, verbose=False, colorful=False):
|
||||||
|
self._dry_run = dry_run
|
||||||
|
self._verbose = verbose
|
||||||
|
self._colors = collections.defaultdict(lambda: "")
|
||||||
|
if colorful:
|
||||||
|
for color in [
|
||||||
|
NotebookAnalyser.COLOR_WHITE,
|
||||||
|
NotebookAnalyser.COLOR_RED,
|
||||||
|
NotebookAnalyser.COLOR_GREEN,
|
||||||
|
NotebookAnalyser.COLOR_YELLOW,
|
||||||
|
]:
|
||||||
|
self._colors[color] = "\033[{}m".format(color)
|
||||||
|
|
||||||
|
NOTEBOOK_SUFFIX = ".ipynb"
|
||||||
|
CHECKPOINT_DIR = NOTEBOOK_SUFFIX + "_checkpoints"
|
||||||
|
CHECKPOINT_MASK = "*-checkpoint" + NOTEBOOK_SUFFIX
|
||||||
|
CHECKPOINT_MASK_LEN = len(CHECKPOINT_MASK) - 1
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_hash(file_path):
|
||||||
|
with open(file_path, "rb") as input:
|
||||||
|
hash = hashlib.md5()
|
||||||
|
for chunk in iter(lambda: input.read(4096), b""):
|
||||||
|
hash.update(chunk)
|
||||||
|
return hash.hexdigest()
|
||||||
|
|
||||||
|
MESSAGE_ORPHANED = "missing "
|
||||||
|
MESSAGE_MODIFIED = "modified"
|
||||||
|
MESSAGE_DELETED = "DELETING"
|
||||||
|
|
||||||
|
COLOR_WHITE = "0"
|
||||||
|
COLOR_RED = "31"
|
||||||
|
COLOR_GREEN = "32"
|
||||||
|
COLOR_YELLOW = "33"
|
||||||
|
|
||||||
|
def log(self, message, file, color=COLOR_WHITE):
|
||||||
|
color_on = self._colors[color]
|
||||||
|
color_off = self._colors[NotebookAnalyser.COLOR_WHITE]
|
||||||
|
print("{}{}{}: {}".format(color_on, message, color_off, file))
|
||||||
|
|
||||||
|
def clean_checkpoints(self, directory):
|
||||||
|
for checkpoint_path in sorted(glob.glob(os.path.join(directory, NotebookAnalyser.CHECKPOINT_MASK))):
|
||||||
|
|
||||||
|
workfile_dir = os.path.dirname(os.path.dirname(checkpoint_path))
|
||||||
|
workfile_name = os.path.basename(checkpoint_path)[:-NotebookAnalyser.CHECKPOINT_MASK_LEN] + NotebookAnalyser.NOTEBOOK_SUFFIX
|
||||||
|
workfile_path = os.path.join(workfile_dir, workfile_name)
|
||||||
|
|
||||||
|
status = ""
|
||||||
|
if not os.path.isfile(workfile_path):
|
||||||
|
if self._verbose:
|
||||||
|
self.log(NotebookAnalyser.MESSAGE_ORPHANED, workfile_path, NotebookAnalyser.COLOR_RED)
|
||||||
|
else:
|
||||||
|
checkpoint_stat = os.stat(checkpoint_path)
|
||||||
|
workfile_stat = os.stat(workfile_path)
|
||||||
|
|
||||||
|
modified = workfile_stat.st_size != checkpoint_stat.st_size
|
||||||
|
|
||||||
|
if not modified:
|
||||||
|
checkpoint_hash = NotebookAnalyser.get_hash(checkpoint_path)
|
||||||
|
workfile_hash = NotebookAnalyser.get_hash(workfile_path)
|
||||||
|
modified = checkpoint_hash != workfile_hash
|
||||||
|
|
||||||
|
if modified:
|
||||||
|
if self._verbose:
|
||||||
|
self.log(NotebookAnalyser.MESSAGE_MODIFIED, workfile_path, NotebookAnalyser.COLOR_YELLOW)
|
||||||
|
else:
|
||||||
|
self.log(NotebookAnalyser.MESSAGE_DELETED, checkpoint_path, NotebookAnalyser.COLOR_GREEN)
|
||||||
|
if not self._dry_run:
|
||||||
|
os.remove(checkpoint_path)
|
||||||
|
|
||||||
|
if not self._dry_run and not os.listdir(directory):
|
||||||
|
self.log(NotebookAnalyser.MESSAGE_DELETED, directory, NotebookAnalyser.COLOR_GREEN)
|
||||||
|
os.rmdir(directory)
|
||||||
|
|
||||||
|
def clean_checkpoints_recursively(self, directory):
|
||||||
|
for (root, subdirs, files) in os.walk(directory):
|
||||||
|
subdirs.sort() # INFO: traverse alphabetically
|
||||||
|
if NotebookAnalyser.CHECKPOINT_DIR in subdirs:
|
||||||
|
subdirs.remove(NotebookAnalyser.CHECKPOINT_DIR) # INFO: don't recurse there
|
||||||
|
self.clean_checkpoints(os.path.join(root, NotebookAnalyser.CHECKPOINT_DIR))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description="Remove checkpointed versions of those jupyter notebooks that are identical to their working copies.",
|
||||||
|
epilog="""Notebooks will be reported as either
|
||||||
|
"DELETED" if the working copy and checkpointed version are identical
|
||||||
|
(checkpoint will be deleted),
|
||||||
|
"missing" if there is a checkpoint but no corresponding working file can be found
|
||||||
|
or "modified" if notebook and the checkpoint are not byte-to-byte identical.
|
||||||
|
If removal of checkpoints results in empty ".ipynb_checkpoints" directory
|
||||||
|
that directory is also deleted.
|
||||||
|
""") #, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||||
|
parser.add_argument("dirs", metavar="DIR", type=str, nargs="*", default=".", help="directories to search")
|
||||||
|
parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
|
||||||
|
parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode")
|
||||||
|
parser.add_argument("-c", "--color", action="store_true", help="colorful mode")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
analyser = NotebookAnalyser(args.dry_run, args.verbose, args.color)
|
||||||
|
for directory in args.dirs:
|
||||||
|
analyser.clean_checkpoints_recursively(directory)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
DIRNAME=$(dirname "$1")
|
||||||
|
BASENAME=$(basename "$1" .ipynb)
|
||||||
|
|
||||||
|
WORKING_COPY=$DIRNAME/$BASENAME.ipynb
|
||||||
|
CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
|
||||||
|
|
||||||
|
echo "How change $CHECKPOINT_COPY into $WORKING_COPY"
|
||||||
|
nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details
|
|
@ -0,0 +1,54 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
def remove_empty_directories(initial_dir,
|
||||||
|
allow_initial_delete=False, ignore_nonexistant_initial=False,
|
||||||
|
dry_run=False, quiet=False):
|
||||||
|
|
||||||
|
FORBIDDEN_SUBDIRS = set([".git"])
|
||||||
|
|
||||||
|
if not os.path.isdir(initial_dir) and not ignore_nonexistant_initial:
|
||||||
|
raise RuntimeError("Initial directory '{}' not found!".format(initial_dir))
|
||||||
|
|
||||||
|
message = "removed"
|
||||||
|
if dry_run:
|
||||||
|
message = "to be " + message
|
||||||
|
|
||||||
|
deleted = set()
|
||||||
|
|
||||||
|
for (directory, subdirs, files) in os.walk(initial_dir, topdown=False):
|
||||||
|
forbidden = False
|
||||||
|
parent = directory
|
||||||
|
while parent:
|
||||||
|
parent, dirname = os.path.split(parent)
|
||||||
|
if dirname in FORBIDDEN_SUBDIRS:
|
||||||
|
forbidden = True
|
||||||
|
break
|
||||||
|
if forbidden:
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_empty = len(files) < 1 and len(set([os.path.join(directory, s) for s in subdirs]) - deleted) < 1
|
||||||
|
|
||||||
|
if is_empty and (initial_dir != directory or allow_initial_delete):
|
||||||
|
if not quiet:
|
||||||
|
print("{}: {}".format(message, directory))
|
||||||
|
deleted.add(directory)
|
||||||
|
if not dry_run:
|
||||||
|
os.rmdir(directory)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description="Remove empty directories recursively in subtree.")
|
||||||
|
parser.add_argument("dir", metavar="DIR", type=str, nargs="*", default=".", help="directory to be searched")
|
||||||
|
parser.add_argument("-r", "--allow-dir-removal", action="store_true", help="allow deletion of DIR itself")
|
||||||
|
parser.add_argument("-i", "--ignore-nonexistent-dir", action="store_true", help="don't throw an error if DIR doesn't exist")
|
||||||
|
parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
|
||||||
|
parser.add_argument("-q", "--quiet", action="store_true", help="don't print names of directories being removed")
|
||||||
|
args = parser.parse_args()
|
||||||
|
for directory in args.dir:
|
||||||
|
remove_empty_directories(directory, args.allow_dir_removal, args.ignore_nonexistent_dir,
|
||||||
|
args.dry_run, args.quiet)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/bash
|
||||||
|
python -m tensorboard.main "$@"
|
Loading…
Reference in New Issue