-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #41 from LaTeleScop/develop
Single configurable Dockerfile with multi-stage build + external bazel cache
- Loading branch information
Showing
22 changed files
with
488 additions
and
1,446 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.git | ||
python/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
##### Configurable Dockerfile with multi-stage build - Author: Vincent Delbar | ||
## Mandatory | ||
ARG BASE_IMG | ||
|
||
# ---------------------------------------------------------------------------- | ||
# Init base stage - will be cloned as intermediate build env | ||
FROM $BASE_IMG AS otbtf-base | ||
WORKDIR /tmp | ||
|
||
### System packages | ||
COPY tools/docker/build-deps-*.txt ./ | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
# CLI | ||
RUN apt-get update -y && apt-get upgrade -y \ | ||
&& cat build-deps-cli.txt | xargs apt-get install --no-install-recommends -y \ | ||
&& apt-get clean && rm -rf /var/lib/apt/lists/* | ||
# Optional GUI | ||
ARG GUI=false | ||
RUN if $GUI; then \ | ||
apt-get update -y \ | ||
&& cat build-deps-gui.txt | xargs apt-get install --no-install-recommends -y \ | ||
&& apt-get clean && rm -rf /var/lib/apt/lists/* ; fi | ||
|
||
### Python3 links and pip packages | ||
RUN ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip | ||
# NumPy version is conflicting with system's gdal dep and may require venv | ||
ARG NUMPY_SPEC="~=1.19" | ||
RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \ | ||
&& pip install --no-cache-dir --no-deps keras_applications keras_preprocessing | ||
|
||
# ---------------------------------------------------------------------------- | ||
# Tmp builder stage - dangling cache should persist until "docker builder prune" | ||
FROM otbtf-base AS builder | ||
# A smaller value may be required to avoid OOM errors when building OTB GUI | ||
ARG CPU_RATIO=1 | ||
|
||
RUN mkdir -p /src/tf /opt/otbtf/bin /opt/otbtf/include /opt/otbtf/lib | ||
WORKDIR /src/tf | ||
|
||
RUN git config --global advice.detachedHead false | ||
|
||
### TF | ||
ARG TF=v2.4.1 | ||
# Install bazelisk (will read .bazelversion and download the right bazel binary - latest by default) | ||
RUN wget -qO /opt/otbtf/bin/bazelisk https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64 \ | ||
&& chmod +x /opt/otbtf/bin/bazelisk \ | ||
&& ln -s /opt/otbtf/bin/bazelisk /opt/otbtf/bin/bazel | ||
|
||
ARG BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package" | ||
# "--config=opt" will enable 'march=native' (otherwise edit CC_OPT_FLAGS in build-env-tf.sh) | ||
ARG BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt" | ||
# "--compilation_mode opt" is already enabled by default (see tf repo .bazelrc and configure.py) | ||
ARG BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090" | ||
|
||
# Build | ||
ARG KEEP_SRC_TF=false | ||
COPY tools/docker/build-env-tf.sh ./ | ||
RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.git \ | ||
&& cd tensorflow \ | ||
&& export PATH=$PATH:/opt/otbtf/bin \ | ||
&& export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/otbtf/lib \ | ||
&& bash -c '\ | ||
source ../build-env-tf.sh \ | ||
&& ./configure \ | ||
&& export TMP=/tmp/bazel \ | ||
&& BZL_CMD="build $BZL_TARGETS $BZL_CONFIGS $BZL_OPTIONS" \ | ||
&& bazel $BZL_CMD --jobs="HOST_CPUS*$CPU_RATIO" ' \ | ||
# Installation - split here if you want to check files ^ | ||
#RUN cd tensorflow \ | ||
&& ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg \ | ||
&& pip3 install --no-cache-dir --prefix=/opt/otbtf /tmp/tensorflow_pkg/tensorflow*.whl \ | ||
&& ln -s /opt/otbtf/lib/python3.* /opt/otbtf/lib/python3 \ | ||
&& cp -P bazel-bin/tensorflow/libtensorflow_cc.so* /opt/otbtf/lib/ \ | ||
&& ln -s $(find /opt/otbtf -type d -wholename "*/site-packages/tensorflow/include") /opt/otbtf/include/tf \ | ||
# The only missing header in the wheel | ||
&& cp tensorflow/cc/saved_model/tag_constants.h /opt/otbtf/include/tf/tensorflow/cc/saved_model/ \ | ||
# Symlink external libs (required for MKL - libiomp5) | ||
&& for f in $(find -L /opt/otbtf/include/tf -wholename "*/external/*/*.so"); do ln -s $f /opt/otbtf/lib/; done \ | ||
# Cleaning | ||
&& rm -rf bazel-* \ | ||
&& ( $KEEP_SRC_TF || rm -rf /src/tf ) \ | ||
&& rm -rf /root/.cache/ /tmp/* | ||
|
||
### OTB | ||
ARG GUI=false | ||
ARG OTB=7.2.0 | ||
|
||
RUN mkdir /src/otb | ||
WORKDIR /src/otb | ||
|
||
# SuperBuild OTB | ||
COPY tools/docker/build-flags-otb.txt ./ | ||
RUN git clone --single-branch -b $OTB https://gitlab.orfeo-toolbox.org/orfeotoolbox/otb.git \ | ||
&& mkdir -p build \ | ||
&& cd build \ | ||
# Set GL/Qt build flags | ||
&& if $GUI; then \ | ||
sed -i -r "s/-DOTB_USE_(QT|OPENGL|GL[UFE][WT])=OFF/-DOTB_USE_\1=ON/" ../build-flags-otb.txt; fi \ | ||
# Possible ENH: superbuild-all-dependencies switch, with separated build-deps-minimal.txt and build-deps-otbcli.txt) | ||
#&& if $OTB_SUPERBUILD_ALL; then sed -i -r "s/-DOTB_USE_SYSTEM_([A-Z0-9]*)=ON/-DOTB_USE_SYSTEM_\1=OFF/"" ../build-flags-otb.txt; fi \ | ||
&& OTB_FLAGS=$(cat "../build-flags-otb.txt") \ | ||
&& cmake ../otb/SuperBuild -DCMAKE_INSTALL_PREFIX=/opt/otbtf $OTB_FLAGS \ | ||
&& make -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") | ||
|
||
### OTBTF - copy (without .git/) or clone repository | ||
COPY . /src/otbtf | ||
#RUN git clone https://github.com/remicres/otbtf.git /src/otbtf | ||
RUN ln -s /src/otbtf /src/otb/otb/Modules/Remote/otbtf | ||
|
||
# Rebuild OTB with module | ||
ARG KEEP_SRC_OTB=false | ||
RUN cd /src/otb/build/OTB/build \ | ||
&& export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/otbtf/lib \ | ||
&& export PATH=$PATH:/opt/otbtf/bin \ | ||
&& cmake /src/otb/otb \ | ||
-DCMAKE_INSTALL_PREFIX=/opt/otbtf \ | ||
-DOTB_WRAP_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 \ | ||
-DOTB_USE_TENSORFLOW=ON -DModule_OTBTensorflow=ON \ | ||
-Dtensorflow_include_dir=/opt/otbtf/include/tf \ | ||
# Forcing TF>=2, this Dockerfile hasn't been tested with v1 + missing link for libtensorflow_framework.so in the wheel | ||
-DTENSORFLOW_CC_LIB=/opt/otbtf/lib/libtensorflow_cc.so.2 \ | ||
-DTENSORFLOW_FRAMEWORK_LIB=/opt/otbtf/lib/python3/site-packages/tensorflow/libtensorflow_framework.so.2 \ | ||
&& make install -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") \ | ||
# Cleaning | ||
&& ( $GUI || rm -rf /opt/otbtf/bin/otbgui* ) \ | ||
&& ( $KEEP_SRC_OTB || rm -rf /src/otb ) \ | ||
&& rm -rf /root/.cache /tmp/* | ||
|
||
# Symlink executable python files in PATH | ||
RUN for f in /src/otbtf/python/*.py; do if [ -x $f ]; then ln -s $f /opt/otbtf/bin/; fi; done | ||
|
||
# ---------------------------------------------------------------------------- | ||
# Final stage | ||
FROM otbtf-base | ||
MAINTAINER Remi Cresson <remi.cresson[at]inrae[dot]fr> | ||
|
||
# Copy files from intermediate stage | ||
COPY --from=builder /opt/otbtf /opt/otbtf | ||
COPY --from=builder /src /src | ||
|
||
# System-wide ENV | ||
ENV PATH="/opt/otbtf/bin:$PATH" | ||
ENV LD_LIBRARY_PATH="/opt/otbtf/lib:$LD_LIBRARY_PATH" | ||
ENV PYTHONPATH="/opt/otbtf/lib/python3/site-packages:/opt/otbtf/lib/otb/python:/src/otbtf/python" | ||
ENV OTB_APPLICATION_PATH="/opt/otbtf/lib/otb/applications" | ||
|
||
# Default user, directory and command (bash is the entrypoint when using 'docker create') | ||
RUN useradd -s /bin/bash -m otbuser | ||
WORKDIR /home/otbuser | ||
|
||
# Admin rights without password | ||
ARG SUDO=true | ||
RUN if $SUDO; then \ | ||
usermod -a -G sudo otbuser \ | ||
&& echo "otbuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers; fi | ||
|
||
# Set /src/otbtf ownership to otbuser (but you still need 'sudo -i' in order to rebuild TF or OTB) | ||
RUN chown -R otbuser:otbuser /src/otbtf | ||
|
||
# This won't prevent ownership problems with volumes if you're not UID 1000 | ||
USER otbuser | ||
# User-only ENV | ||
|
||
# Test python imports | ||
RUN python -c "import tensorflow" | ||
RUN python -c "import otbtf, tricks" | ||
RUN python -c "import otbApplication as otb; otb.Registry.CreateApplication('ImageClassifierFromDeepFeatures')" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
python/create_savedmodel_ienco-m3_patchbased.py
100644 → 100755
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
python/create_savedmodel_maggiori17_fullyconv.py
100644 → 100755
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# Build with Docker | ||
Docker build has to be called from the root of the repository (i.e. `docker build .` or `bash tools/docker/multibuild.sh`). | ||
You can build a custom image using `--build-arg` and several config files : | ||
- Ubuntu : `BASE_IMG` should accept any version, for additional packages see [build-deps-cli.txt](build-deps-cli.txt) and [build-deps-gui.txt](build-deps-gui.txt) | ||
- TensorFlow : `TF` arg for the git branch or tag + [build-env-tf.sh](build-env-tf.sh) and BZL_* arguments for the build configuration | ||
- OrfeoToolBox : `OTB` arg for the git branch or tag + [build-flags-otb.txt](build-flags-otb.txt) to edit cmake flags | ||
|
||
### Base images | ||
```bash | ||
UBUNTU=20.04 # or 16.04, 18.04 | ||
CUDA=11.0.3 # or 10.1, 10.2 | ||
CUDNN=8 # or 7 | ||
IMG=ubuntu:$UBUNTU | ||
GPU_IMG=nvidia/cuda:$CUDA-cudnn$CUDNN-devel-ubuntu$UBUNTU | ||
``` | ||
|
||
### Default arguments | ||
```bash | ||
BASE_IMG # mandatory | ||
CPU_RATIO=0.95 | ||
GUI=false | ||
NUMPY_SPEC="~=1.19" | ||
TF=r2.4.1 | ||
OTB=7.2.0 | ||
BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package" | ||
BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt" | ||
BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090" | ||
KEEP_SRC_TF=false | ||
KEEP_SRC_OTB=false | ||
SUDO=true | ||
|
||
# NumPy version requirement : | ||
# TF < 2.4 : "numpy<1.19.0,>=1.16.0" | ||
# TF >= 2.4 : "numpy~=1.19" | ||
``` | ||
|
||
### Bazel remote cache daemon | ||
If you just need to rebuild with different GUI or KEEP_SRC arguments, or may be a different branch of OTB, bazel cache will help you to rebuild everything except TF, even if the docker cache was purged (after `docker [system|builder] prune`). | ||
In order to recycle the cache, bazel config and TF git tag should be exactly the same, any change in [build-env-tf.sh](build-env-tf.sh) and `--build-arg` (if related to bazel env, cuda, mkl, xla...) may result in a fresh new build. | ||
|
||
Start a cache daemon - here with max 20GB but 12GB should be enough to save 2 TF builds (GPU and CPU): | ||
```bash | ||
mkdir -p $HOME/.cache/bazel-remote | ||
docker run --detach -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20 | ||
``` | ||
Then just add ` --network='host'` to the docker build command, or connect bazel to a remote server - see 'BZL_OPTIONS'. | ||
The other way of docker is a virtual bridge, but you'll need to edit the IP address. | ||
|
||
## Build examples | ||
```bash | ||
# Build for CPU using default Dockerfiles args (without AWS, HDFS or GCP support) | ||
docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 . | ||
|
||
# Clear bazel config var (deactivate default optimizations and unset noaws/nogcp/nohdfs) | ||
docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS= . | ||
|
||
# Enable MKL | ||
MKL_CONFIG="--config=nogcp --config=noaws --config=nohdfs --config=opt --config=mkl" | ||
docker build --network='host' -t otbtf:cpu-mkl --build-arg BZL_CONFIGS="$MKL_CONFIG" --build-arg BASE_IMG=ubuntu:20.04 . | ||
|
||
# Build for GPU (if you're building for your system only you should edit CUDA_COMPUTE_CAPABILITIES in build-env-tf.sh) | ||
docker build --network='host' -t otbtf:gpu --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 . | ||
|
||
# Build dev with TF and OTB sources (huge image) + set git branches/tags to clone | ||
docker build --network='host' -t otbtf:gpu-dev-full --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 \ | ||
--build-arg KEEP_SRC_OTB=true --buid-arg KEEP_SRC_TF=true --build-arg TF=nightly --build-arg OTB=develop . | ||
|
||
# Build old release | ||
docker build --network='host' -t otbtf:oldstable-gpu --build-arg BASE_IMG=nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 \ | ||
--build-arg TF=r2.1 --build-arg NUMPY_SPEC="<1.19" \ | ||
--build-arg BAZEL_OPTIONS="--noincompatible_do_not_split_linking_cmdline --verbose_failures --remote_cache=http://localhost:9090" . | ||
# You could edit the Dockerfile in order to clone an old branch of the repo instead of copying files from the build context | ||
``` | ||
|
||
### Debug build | ||
If you fail to build, you can log into the last layer and check CMake logs. Run `docker images`, find the latest layer ID and run a tmp container (`docker run -it d60496d9612e bash`). | ||
You may also need to split some multi-command layers in the Dockerfile. | ||
If you see OOM errors during SuperBuild you should decrease CPU_RATIO (e.g. 0.75). | ||
|
||
## Container examples | ||
```bash | ||
# Pull GPU image and create a new container with your home directory as volume (requires apt package nvidia-docker2 and CUDA>=11.0) | ||
docker create --gpus=all --volume $HOME:/home/otbuser/volume -it --name otbtf-gpu mdl4eo/otbtf2.1:gpu | ||
|
||
# Run interactive | ||
docker start -i otbtf-gpu | ||
|
||
# Run in background | ||
docker start otbtf-gpu | ||
docker exec otbtf-gpu python -c 'import tensorflow as tf; print(tf.test.is_gpu_available())' | ||
``` | ||
|
||
### Rebuild OTB with more modules | ||
```bash | ||
docker create --gpus=all -it --name otbtf-gpu-dev mdl4eo/otbtf2.1:gpu-dev | ||
docker start -i otbtf-gpu-dev | ||
``` | ||
```bash | ||
# From the container shell: | ||
sudo -i | ||
cd /src/otb/otb/Modules/Remote | ||
git clone https://gitlab.irstea.fr/raffaele.gaetano/otbSelectiveHaralickTextures.git | ||
cd /src/otb/build/OTB/build | ||
cmake -DModule_OTBAppSelectiveHaralickTextures=ON /src/otb/otb && make install -j | ||
``` | ||
|
||
### Container with GUI | ||
```bash | ||
# GUI is disabled by default in order to save space, and because docker xvfb isn't working properly with OpenGL. | ||
# => otbgui seems OK but monteverdi isn't working | ||
docker build --network='host' -t otbtf:cpu-gui --build-arg BASE_IMG=ubuntu:20.04 --build-arg GUI=true . | ||
docker create -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY -it --name otbtf-gui otbtf:cpu-gui | ||
docker start -i otbtf-gui | ||
$ mapla | ||
``` | ||
|
||
### Common errors | ||
Buid : | ||
`Error response from daemon: manifest for nvidia/cuda:11.0-cudnn8-devel-ubuntu20.04 not found: manifest unknown: manifest unknown` | ||
=> Image is missing from dockerhub | ||
|
||
Run : | ||
`failed call to cuInit: UNKNOWN ERROR (303) / no NVIDIA GPU device is present: /dev/nvidia0 does not exist` | ||
=> Nvidia driver is missing or disabled, make sure to add ` --gpus=all` to your docker run or create command |
Oops, something went wrong.