handson-ml/docker/Dockerfile.gpu

204 lines
8.4 KiB
Docker

# This Dockerfile includes sections from tensorflow/tensorflow:latest-gpu's Dockerfile:
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
# and sections from continuumio/miniconda3:latest's Dockerfile:
# https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile
# First we need CUDA and everything else needed to support GPUs
###############################################
#### FROM tensorflow/tensorflow:latest-gpu ####
###############################################
ARG UBUNTU_VERSION=20.04
ARG ARCH=
ARG CUDA=11.2
FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
# ARCH and CUDA are specified again because the FROM directive resets ARGs
# (but their default value is retained if set previously)
ARG ARCH
ARG CUDA
ARG CUDNN=8.1.0.77-1
ARG CUDNN_MAJOR_VERSION=8
ARG LIB_DIR_PREFIX=x86_64
ARG LIBNVINFER=7.2.2-1
ARG LIBNVINFER_MAJOR_VERSION=7
# Let us install tzdata painlessly
ENV DEBIAN_FRONTEND=noninteractive
# Needed for string substitution
SHELL ["/bin/bash", "-c"]
# Pick up some TF dependencies
# [HOML3] Tweaked for handson-ml3: added all the libs before build-essentials
# and call apt clean + delete apt cache files.
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
apt-get update && apt-get install -y --no-install-recommends \
git \
protobuf-compiler \
sudo \
wget \
build-essential \
cuda-command-line-tools-${CUDA/./-} \
libcublas-${CUDA/./-} \
cuda-nvrtc-${CUDA/./-} \
libcufft-${CUDA/./-} \
libcurand-${CUDA/./-} \
libcusolver-${CUDA/./-} \
libcusparse-${CUDA/./-} \
curl \
libcudnn8=${CUDNN}+cuda${CUDA} \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
software-properties-common \
unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install TensorRT if not building for PowerPC
# NOTE: libnvinfer uses cuda11.1 versions
RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub && \
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/tensorRT.list && \
apt-get update && \
apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.0 \
libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*; }
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda-11.0/targets/x86_64-linux/lib:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure
# dynamic linker run-time bindings
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
&& echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \
&& ldconfig
# [HOML3] Tweaked for handson-ml3: removed Python3 & TensorFlow installation using pip
#################################################
#### End of tensorflow/tensorflow:latest-gpu ####
#################################################
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:/opt/conda/envs/homl3/bin:$PATH
# Next we need to install miniconda
############################################
#### FROM continuumio/miniconda3:latest ####
############################################
# [HOML2] Tweaked for handson-ml3: removed the beginning of the Dockerfile
CMD [ "/bin/bash" ]
# Leave these args here to better use the Docker build cache
ARG CONDA_VERSION=py39_4.12.0
RUN set -x && \
UNAME_M="$(uname -m)" && \
if [ "${UNAME_M}" = "x86_64" ]; then \
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \
SHA256SUM="78f39f9bae971ec1ae7969f0516017f2413f17796670f7040725dd83fcff5689"; \
elif [ "${UNAME_M}" = "s390x" ]; then \
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \
SHA256SUM="ff6fdad3068ab5b15939c6f422ac329fa005d56ee0876c985e22e622d930e424"; \
elif [ "${UNAME_M}" = "aarch64" ]; then \
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \
SHA256SUM="5f4f865812101fdc747cea5b820806f678bb50fe0a61f19dc8aa369c52c4e513"; \
elif [ "${UNAME_M}" = "ppc64le" ]; then \
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \
SHA256SUM="1fe3305d0ccc9e55b336b051ae12d82f33af408af4b560625674fa7ad915102b"; \
fi && \
wget "${MINICONDA_URL}" -O miniconda.sh -q && \
echo "${SHA256SUM} miniconda.sh" > shasum && \
if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \
mkdir -p /opt && \
sh miniconda.sh -b -p /opt/conda && \
rm miniconda.sh shasum && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc && \
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
/opt/conda/bin/conda clean -afy
##############################################
#### End of continuumio/miniconda3:latest ####
##############################################
# Now we're ready to create our conda environment
COPY environment.yml /tmp/
RUN conda env create -f /tmp/environment.yml \
&& conda clean -afy \
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
&& find /opt/conda/ -follow -type f -name '*.pyc' -delete \
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
&& rm /tmp/environment.yml
ARG username
ARG userid
ARG home=/home/${username}
ARG workdir=${home}/handson-ml3
RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \
&& echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \
&& chmod 0440 /etc/sudoers.d/${username}
WORKDIR ${workdir}
RUN chown ${username}:${username} ${workdir}
USER ${username}
WORKDIR ${workdir}
# The config below enables diffing notebooks with nbdiff (and nbdiff support
# in git diff command) after connecting to the container by "make exec" (or
# "docker-compose exec handson-ml3 bash")
# You may also try running:
# nbdiff NOTEBOOK_NAME.ipynb
# to get nbdiff between checkpointed version and current version of the
# given notebook.
RUN git-nbdiffdriver config --enable --global
# INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either
# metadata or details in nbdiff within git diff
#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details'
COPY docker/bashrc.bash /tmp/
RUN cat /tmp/bashrc.bash >> ${home}/.bashrc \
&& echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc \
&& sudo rm /tmp/bashrc.bash
# INFO: Uncomment lines below to enable automatic save of python-only and html-only
# exports alongside the notebook
#COPY docker/jupyter_notebook_config.py /tmp/
#RUN cat /tmp/jupyter_notebook_config.py >> ${home}/.jupyter/jupyter_notebook_config.py
#RUN sudo rm /tmp/jupyter_notebook_config.py
# INFO: Uncomment the RUN command below to disable git diff paging
#RUN git config --global core.pager ''
# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888)
# That will switch Jupyter to using empty password instead of a token.
# To avoid making a security hole you SHOULD in fact not only uncomment but
# regenerate the hash for your own non-empty password and replace the hash below.
# You can compute a password hash in any notebook, just run the code:
# from notebook.auth import passwd
# passwd()
# and take the hash from the output
#RUN mkdir -p ${home}/.jupyter && \
# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
# >> ${home}/.jupyter/jupyter_notebook_config.py