Add Dockerfile.gpu for GPU support
parent
abd75d7fd7
commit
d6c2ba1b8f
|
@ -2,12 +2,20 @@ FROM continuumio/miniconda3:latest
|
||||||
|
|
||||||
RUN apt-get update && apt-get upgrade -y \
|
RUN apt-get update && apt-get upgrade -y \
|
||||||
&& apt-get install -y \
|
&& apt-get install -y \
|
||||||
libpq-dev \
|
|
||||||
build-essential \
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
ffmpeg \
|
||||||
git \
|
git \
|
||||||
|
libboost-all-dev \
|
||||||
|
libjpeg-dev \
|
||||||
|
libpq-dev \
|
||||||
|
libsdl2-dev swig \
|
||||||
sudo \
|
sudo \
|
||||||
cmake zlib1g-dev libjpeg-dev xvfb ffmpeg xorg-dev libboost-all-dev libsdl2-dev swig \
|
unzip \
|
||||||
unzip zip \
|
xorg-dev \
|
||||||
|
xvfb \
|
||||||
|
zip \
|
||||||
|
zlib1g-dev \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY environment.yml /tmp/
|
COPY environment.yml /tmp/
|
||||||
|
|
|
@ -0,0 +1,195 @@
|
||||||
|
# This Dockerfile includes sections from tensorflow/tensorflow:latest-gpu's Dockerfile:
|
||||||
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
|
||||||
|
# and sections from continuumio/miniconda3:latest's Dockerfile:
|
||||||
|
# https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile
|
||||||
|
|
||||||
|
|
||||||
|
# First we need CUDA and everything else needed to support GPUs
|
||||||
|
|
||||||
|
###############################################
|
||||||
|
#### FROM tensorflow/tensorflow:latest-gpu ####
|
||||||
|
###############################################
|
||||||
|
ARG UBUNTU_VERSION=18.04
|
||||||
|
|
||||||
|
ARG ARCH=
|
||||||
|
ARG CUDA=11.0
|
||||||
|
FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
|
||||||
|
# ARCH and CUDA are specified again because the FROM directive resets ARGs
|
||||||
|
# (but their default value is retained if set previously)
|
||||||
|
ARG ARCH
|
||||||
|
ARG CUDA
|
||||||
|
ARG CUDNN=8.0.4.30-1
|
||||||
|
ARG CUDNN_MAJOR_VERSION=8
|
||||||
|
ARG LIB_DIR_PREFIX=x86_64
|
||||||
|
ARG LIBNVINFER=7.1.3-1
|
||||||
|
ARG LIBNVINFER_MAJOR_VERSION=7
|
||||||
|
|
||||||
|
# Needed for string substitution
|
||||||
|
SHELL ["/bin/bash", "-c"]
|
||||||
|
# Pick up some TF dependencies
|
||||||
|
# [HOML2] Tweaked for handson-ml2: added all the libs before build-essentials
|
||||||
|
RUN apt-get update -q && apt-get install -q -y --no-install-recommends \
|
||||||
|
bzip2 \
|
||||||
|
ca-certificates \
|
||||||
|
cmake \
|
||||||
|
ffmpeg \
|
||||||
|
git \
|
||||||
|
libboost-all-dev \
|
||||||
|
libglib2.0-0 \
|
||||||
|
libjpeg-dev \
|
||||||
|
libpq-dev \
|
||||||
|
libsdl2-dev \
|
||||||
|
libsm6 \
|
||||||
|
libxext6 \
|
||||||
|
libxrender1 \
|
||||||
|
mercurial \
|
||||||
|
subversion \
|
||||||
|
sudo \
|
||||||
|
swig \
|
||||||
|
wget \
|
||||||
|
xorg-dev \
|
||||||
|
xvfb \
|
||||||
|
zip \
|
||||||
|
zlib1g-dev \
|
||||||
|
build-essential \
|
||||||
|
cuda-command-line-tools-${CUDA/./-} \
|
||||||
|
libcublas-${CUDA/./-} \
|
||||||
|
cuda-nvrtc-${CUDA/./-} \
|
||||||
|
libcufft-${CUDA/./-} \
|
||||||
|
libcurand-${CUDA/./-} \
|
||||||
|
libcusolver-${CUDA/./-} \
|
||||||
|
libcusparse-${CUDA/./-} \
|
||||||
|
curl \
|
||||||
|
libcudnn8=${CUDNN}+cuda${CUDA} \
|
||||||
|
libfreetype6-dev \
|
||||||
|
libhdf5-serial-dev \
|
||||||
|
libzmq3-dev \
|
||||||
|
pkg-config \
|
||||||
|
software-properties-common \
|
||||||
|
unzip
|
||||||
|
|
||||||
|
# Install TensorRT if not building for PowerPC
|
||||||
|
RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
|
||||||
|
libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*; }
|
||||||
|
|
||||||
|
# For CUDA profiling, TensorFlow requires CUPTI.
|
||||||
|
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure
|
||||||
|
# dynamic linker run-time bindings
|
||||||
|
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
|
||||||
|
&& echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \
|
||||||
|
&& ldconfig
|
||||||
|
|
||||||
|
# [HOML2] Tweaked for handson-ml2: removed Python3 & TensorFlow installation using pip
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
#### End of tensorflow/tensorflow:latest-gpu ####
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
|
||||||
|
ENV PATH /opt/conda/bin:/opt/conda/envs/tf2/bin:$PATH
|
||||||
|
|
||||||
|
# Next we need to install miniconda
|
||||||
|
|
||||||
|
############################################
|
||||||
|
#### FROM continuumio/miniconda3:latest ####
|
||||||
|
############################################
|
||||||
|
|
||||||
|
# [HOML2] Tweaked for handson-ml2: removed the beginning of the Dockerfile
|
||||||
|
CMD [ "/bin/bash" ]
|
||||||
|
|
||||||
|
# Leave these args here to better use the Docker build cache
|
||||||
|
ARG CONDA_VERSION=py38_4.9.2
|
||||||
|
ARG CONDA_MD5=122c8c9beb51e124ab32a0fa6426c656
|
||||||
|
|
||||||
|
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh -O miniconda.sh && \
|
||||||
|
echo "${CONDA_MD5} miniconda.sh" > miniconda.md5 && \
|
||||||
|
if ! md5sum --status -c miniconda.md5; then exit 1; fi && \
|
||||||
|
mkdir -p /opt && \
|
||||||
|
sh miniconda.sh -b -p /opt/conda && \
|
||||||
|
rm miniconda.sh miniconda.md5 && \
|
||||||
|
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
||||||
|
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
||||||
|
echo "conda activate base" >> ~/.bashrc && \
|
||||||
|
find /opt/conda/ -follow -type f -name '*.a' -delete && \
|
||||||
|
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
|
||||||
|
/opt/conda/bin/conda clean -afy
|
||||||
|
|
||||||
|
##############################################
|
||||||
|
#### End of continuumio/miniconda3:latest ####
|
||||||
|
##############################################
|
||||||
|
|
||||||
|
# Now we're ready to create our conda environment
|
||||||
|
|
||||||
|
COPY environment.yml /tmp/
|
||||||
|
RUN conda update -y -n base conda \
|
||||||
|
&& echo ' - pyvirtualdisplay' >> /tmp/environment.yml \
|
||||||
|
&& conda env create -f /tmp/environment.yml \
|
||||||
|
&& conda clean -y -t \
|
||||||
|
&& rm /tmp/environment.yml
|
||||||
|
|
||||||
|
ARG username
|
||||||
|
ARG userid
|
||||||
|
|
||||||
|
ARG home=/home/${username}
|
||||||
|
ARG workdir=${home}/handson-ml2
|
||||||
|
|
||||||
|
RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \
|
||||||
|
&& echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \
|
||||||
|
&& chmod 0440 /etc/sudoers.d/${username}
|
||||||
|
|
||||||
|
WORKDIR ${workdir}
|
||||||
|
RUN chown ${username}:${username} ${workdir}
|
||||||
|
|
||||||
|
USER ${username}
|
||||||
|
WORKDIR ${workdir}
|
||||||
|
|
||||||
|
|
||||||
|
# The config below enables diffing notebooks with nbdiff (and nbdiff support
|
||||||
|
# in git diff command) after connecting to the container by "make exec" (or
|
||||||
|
# "docker-compose exec handson-ml2 bash")
|
||||||
|
# You may also try running:
|
||||||
|
# nbdiff NOTEBOOK_NAME.ipynb
|
||||||
|
# to get nbdiff between checkpointed version and current version of the
|
||||||
|
# given notebook.
|
||||||
|
|
||||||
|
RUN git-nbdiffdriver config --enable --global
|
||||||
|
|
||||||
|
# INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either
|
||||||
|
# metadata or details in nbdiff within git diff
|
||||||
|
#RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
|
||||||
|
RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details'
|
||||||
|
|
||||||
|
|
||||||
|
COPY docker/bashrc.bash /tmp/
|
||||||
|
RUN cat /tmp/bashrc.bash >> ${home}/.bashrc \
|
||||||
|
&& echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc \
|
||||||
|
&& sudo rm /tmp/bashrc.bash
|
||||||
|
|
||||||
|
|
||||||
|
# INFO: Uncomment lines below to enable automatic save of python-only and html-only
|
||||||
|
# exports alongside the notebook
|
||||||
|
#COPY docker/jupyter_notebook_config.py /tmp/
|
||||||
|
#RUN cat /tmp/jupyter_notebook_config.py >> ${home}/.jupyter/jupyter_notebook_config.py
|
||||||
|
#RUN sudo rm /tmp/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
|
||||||
|
# INFO: Uncomment the RUN command below to disable git diff paging
|
||||||
|
#RUN git config --global core.pager ''
|
||||||
|
|
||||||
|
|
||||||
|
# INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888)
|
||||||
|
# That will switch Jupyter to using empty password instead of a token.
|
||||||
|
# To avoid making a security hole you SHOULD in fact not only uncomment but
|
||||||
|
# regenerate the hash for your own non-empty password and replace the hash below.
|
||||||
|
# You can compute a password hash in any notebook, just run the code:
|
||||||
|
# from notebook.auth import passwd
|
||||||
|
# passwd()
|
||||||
|
# and take the hash from the output
|
||||||
|
#RUN mkdir -p ${home}/.jupyter && \
|
||||||
|
# echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
|
||||||
|
# >> ${home}/.jupyter/jupyter_notebook_config.py
|
|
@ -71,3 +71,56 @@ You can see changes you made relative to the version in git using `git diff` whi
|
||||||
|
|
||||||
You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, see bashrc file) to compare one of your notebooks with its `checkpointed` version.<br/>
|
You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, see bashrc file) to compare one of your notebooks with its `checkpointed` version.<br/>
|
||||||
To be precise, the output will tell you *what modifications should be re-played on the **manually saved** version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the **current** i.e. **auto-saved** version (given as command's argument - located in working directory)*.
|
To be precise, the output will tell you *what modifications should be re-played on the **manually saved** version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the **current** i.e. **auto-saved** version (given as command's argument - located in working directory)*.
|
||||||
|
|
||||||
|
## GPU Support on Linux (experimental)
|
||||||
|
|
||||||
|
If you're using Linux, and you have a TensorFlow-compatible GPU card (NVidia card with Compute Capability ≥ 3.5) that you would like TensorFlow to use inside the docker container, then you should download and install the latest driver for your card from [nvidia.com](https://www.nvidia.com/Download/index.aspx?lang=en-us). You will also need to install [NVidia Docker support](https://github.com/NVIDIA/nvidia-docker): if you are using Docker 19.03 or above, you must install the `nvidia-container-toolkit` package, and for earlier versions, you must install `nvidia-docker2`.
|
||||||
|
|
||||||
|
To build the image, edit `docker-compose.yml`, replace the line `dockerfile: ./docker/Dockerfile` with `dockerfile: ./docker/Dockerfile.gpu`, and then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ cd /path/to/project/handson-ml2/docker
|
||||||
|
$ docker-compose build
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the image, it's depends. If you have `docker-compose` version 1.28 or above, that's great! You can simply uncomment the `deploy` section in `docker-compose.yml`, and then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ cd /path/to/project/handson-ml2/docker
|
||||||
|
$ docker-compose up
|
||||||
|
[...]
|
||||||
|
or http://127.0.0.1:8888/?token=[...]
|
||||||
|
```
|
||||||
|
|
||||||
|
However, if you have an earlier version of `docker-compose`, it's simpler to use `docker run` directly. If you are using Docker 19.03 or above, you can run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ cd /path/to/project/handson-ml2
|
||||||
|
$ docker run --name handson-ml2 --gpus all -p 8888:8888 -p 6006:6006 --log-opt mode=non-blocking --log-opt max-buffer-size=50m -d -v `pwd`:/home/devel/handson-ml2 handson-ml2 /opt/conda/envs/tf2/bin/jupyter notebook --ip='0.0.0.0' --port=8888 --no-browser
|
||||||
|
```
|
||||||
|
|
||||||
|
If you are using an older version of Docker, then replace `--gpus all` with `--runtime=nvidia`.
|
||||||
|
|
||||||
|
Then, display the container's logs and point your browser to the URL printed on the screen:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ docker logs handson-ml2
|
||||||
|
[I 09:07:10.805 NotebookApp] Writing notebook server cookie secret to /home/devel/.local/share/jupyter/runtime/notebook_cookie_secret
|
||||||
|
[...]
|
||||||
|
or http://127.0.0.1:8888/?token=[...]
|
||||||
|
```
|
||||||
|
|
||||||
|
If everything goes well, Jupyter should appear, and if you open a notebook and execute the following code, it should show a GPU device in the list:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
tf.config.list_physical_devices()
|
||||||
|
```
|
||||||
|
|
||||||
|
Lastly, to stop and destroy the container (but not the image), run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ docker stop handson-ml2
|
||||||
|
$ docker rm handson-ml2
|
||||||
|
```
|
||||||
|
|
|
@ -3,7 +3,7 @@ services:
|
||||||
handson-ml2:
|
handson-ml2:
|
||||||
build:
|
build:
|
||||||
context: ../
|
context: ../
|
||||||
dockerfile: ./docker/Dockerfile
|
dockerfile: ./docker/Dockerfile #Dockerfile.gpu
|
||||||
args:
|
args:
|
||||||
- username=devel
|
- username=devel
|
||||||
- userid=1000
|
- userid=1000
|
||||||
|
@ -20,3 +20,8 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ../:/home/devel/handson-ml2
|
- ../:/home/devel/handson-ml2
|
||||||
command: /opt/conda/envs/tf2/bin/jupyter notebook --ip='0.0.0.0' --port=8888 --no-browser
|
command: /opt/conda/envs/tf2/bin/jupyter notebook --ip='0.0.0.0' --port=8888 --no-browser
|
||||||
|
#deploy:
|
||||||
|
# resources:
|
||||||
|
# reservations:
|
||||||
|
# devices:
|
||||||
|
# - capabilities: [gpu]
|
Loading…
Reference in New Issue