Merge pull request #41 from LaTeleScop/develop

Single configurable Dockerfile with multi-stage build + external bazel cache
remicres · Jan 25, 2021 · 702dfe2 · 702dfe2
2 parents 3ecf418 + c2731bc
commit 702dfe2
Show file tree

Hide file tree

Showing 22 changed files with 488 additions and 1,446 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+.git
+python/__pycache__
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,167 @@
+##### Configurable Dockerfile with multi-stage build - Author: Vincent Delbar
+## Mandatory
+ARG BASE_IMG
+
+# ----------------------------------------------------------------------------
+# Init base stage - will be cloned as intermediate build env
+FROM $BASE_IMG AS otbtf-base
+WORKDIR /tmp
+
+### System packages
+COPY tools/docker/build-deps-*.txt ./
+ARG DEBIAN_FRONTEND=noninteractive
+# CLI
+RUN apt-get update -y && apt-get upgrade -y \
+ && cat build-deps-cli.txt | xargs apt-get install --no-install-recommends -y \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+# Optional GUI
+ARG GUI=false
+RUN if $GUI; then \
+      apt-get update -y \
+      && cat build-deps-gui.txt | xargs apt-get install --no-install-recommends -y \
+      && apt-get clean && rm -rf /var/lib/apt/lists/* ; fi
+
+### Python3 links and pip packages
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip
+# NumPy version is conflicting with system's gdal dep and may require venv
+ARG NUMPY_SPEC="~=1.19"
+RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \
+ && pip install --no-cache-dir --no-deps keras_applications keras_preprocessing
+
+# ----------------------------------------------------------------------------
+# Tmp builder stage - dangling cache should persist until "docker builder prune"
+FROM otbtf-base AS builder
+# A smaller value may be required to avoid OOM errors when building OTB GUI
+ARG CPU_RATIO=1
+
+RUN mkdir -p /src/tf /opt/otbtf/bin /opt/otbtf/include /opt/otbtf/lib
+WORKDIR /src/tf
+
+RUN git config --global advice.detachedHead false
+
+### TF
+ARG TF=v2.4.1
+# Install bazelisk (will read .bazelversion and download the right bazel binary - latest by default)
+RUN wget -qO /opt/otbtf/bin/bazelisk https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64 \
+ && chmod +x /opt/otbtf/bin/bazelisk \
+ && ln -s /opt/otbtf/bin/bazelisk /opt/otbtf/bin/bazel
+
+ARG BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package"
+# "--config=opt" will enable 'march=native' (otherwise edit CC_OPT_FLAGS in build-env-tf.sh)
+ARG BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt"
+# "--compilation_mode opt" is already enabled by default (see tf repo .bazelrc and configure.py)
+ARG BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090"
+
+# Build
+ARG KEEP_SRC_TF=false
+COPY tools/docker/build-env-tf.sh ./
+RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.git \
+ && cd tensorflow \
+ && export PATH=$PATH:/opt/otbtf/bin \
+ && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/otbtf/lib \
+ && bash -c '\
+      source ../build-env-tf.sh \
+      && ./configure \
+      && export TMP=/tmp/bazel \
+      && BZL_CMD="build $BZL_TARGETS $BZL_CONFIGS $BZL_OPTIONS" \
+      && bazel $BZL_CMD --jobs="HOST_CPUS*$CPU_RATIO" ' \
+# Installation - split here if you want to check files  ^
+#RUN cd tensorflow \
+ && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg \
+ && pip3 install --no-cache-dir --prefix=/opt/otbtf /tmp/tensorflow_pkg/tensorflow*.whl \
+ && ln -s /opt/otbtf/lib/python3.* /opt/otbtf/lib/python3 \
+ && cp -P bazel-bin/tensorflow/libtensorflow_cc.so* /opt/otbtf/lib/ \
+ && ln -s $(find /opt/otbtf -type d -wholename "*/site-packages/tensorflow/include") /opt/otbtf/include/tf \
+ # The only missing header in the wheel
+ && cp tensorflow/cc/saved_model/tag_constants.h /opt/otbtf/include/tf/tensorflow/cc/saved_model/ \
+ # Symlink external libs (required for MKL - libiomp5)
+ && for f in $(find -L /opt/otbtf/include/tf -wholename "*/external/*/*.so"); do ln -s $f /opt/otbtf/lib/; done \
+ # Cleaning
+ && rm -rf bazel-* \
+ && ( $KEEP_SRC_TF || rm -rf /src/tf ) \
+ && rm -rf /root/.cache/ /tmp/*
+
+### OTB
+ARG GUI=false
+ARG OTB=7.2.0
+
+RUN mkdir /src/otb
+WORKDIR /src/otb
+
+# SuperBuild OTB
+COPY tools/docker/build-flags-otb.txt ./
+RUN git clone --single-branch -b $OTB https://gitlab.orfeo-toolbox.org/orfeotoolbox/otb.git \
+ && mkdir -p build \
+ && cd build \
+ # Set GL/Qt build flags
+ && if $GUI; then \
+      sed -i -r "s/-DOTB_USE_(QT|OPENGL|GL[UFE][WT])=OFF/-DOTB_USE_\1=ON/" ../build-flags-otb.txt; fi \
+ # Possible ENH: superbuild-all-dependencies switch, with separated build-deps-minimal.txt and build-deps-otbcli.txt)
+ #&& if $OTB_SUPERBUILD_ALL; then sed -i -r "s/-DOTB_USE_SYSTEM_([A-Z0-9]*)=ON/-DOTB_USE_SYSTEM_\1=OFF/"" ../build-flags-otb.txt; fi \
+ && OTB_FLAGS=$(cat "../build-flags-otb.txt") \
+ && cmake ../otb/SuperBuild -DCMAKE_INSTALL_PREFIX=/opt/otbtf $OTB_FLAGS \
+ && make -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))")
+
+### OTBTF - copy (without .git/) or clone repository
+COPY . /src/otbtf
+#RUN git clone https://github.com/remicres/otbtf.git /src/otbtf
+RUN ln -s /src/otbtf /src/otb/otb/Modules/Remote/otbtf
+
+# Rebuild OTB with module
+ARG KEEP_SRC_OTB=false
+RUN cd /src/otb/build/OTB/build \
+ && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/otbtf/lib \
+ && export PATH=$PATH:/opt/otbtf/bin \
+ && cmake /src/otb/otb \
+      -DCMAKE_INSTALL_PREFIX=/opt/otbtf \
+      -DOTB_WRAP_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 \
+      -DOTB_USE_TENSORFLOW=ON -DModule_OTBTensorflow=ON \
+      -Dtensorflow_include_dir=/opt/otbtf/include/tf \
+      # Forcing TF>=2, this Dockerfile hasn't been tested with v1 + missing link for libtensorflow_framework.so in the wheel
+      -DTENSORFLOW_CC_LIB=/opt/otbtf/lib/libtensorflow_cc.so.2 \
+      -DTENSORFLOW_FRAMEWORK_LIB=/opt/otbtf/lib/python3/site-packages/tensorflow/libtensorflow_framework.so.2 \
+ && make install -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") \
+ # Cleaning
+ && ( $GUI || rm -rf /opt/otbtf/bin/otbgui* ) \
+ && ( $KEEP_SRC_OTB || rm -rf /src/otb ) \
+ && rm -rf /root/.cache /tmp/*
+
+# Symlink executable python files in PATH
+RUN for f in /src/otbtf/python/*.py; do if [ -x $f ]; then ln -s $f /opt/otbtf/bin/; fi; done
+
+# ----------------------------------------------------------------------------
+# Final stage
+FROM otbtf-base
+MAINTAINER Remi Cresson <remi.cresson[at]inrae[dot]fr>
+
+# Copy files from intermediate stage
+COPY --from=builder /opt/otbtf /opt/otbtf
+COPY --from=builder /src /src
+
+# System-wide ENV
+ENV PATH="/opt/otbtf/bin:$PATH"
+ENV LD_LIBRARY_PATH="/opt/otbtf/lib:$LD_LIBRARY_PATH"
+ENV PYTHONPATH="/opt/otbtf/lib/python3/site-packages:/opt/otbtf/lib/otb/python:/src/otbtf/python"
+ENV OTB_APPLICATION_PATH="/opt/otbtf/lib/otb/applications"
+
+# Default user, directory and command (bash is the entrypoint when using 'docker create')
+RUN useradd -s /bin/bash -m otbuser
+WORKDIR /home/otbuser
+
+# Admin rights without password
+ARG SUDO=true
+RUN if $SUDO; then \
+      usermod -a -G sudo otbuser \
+      && echo "otbuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers; fi
+
+# Set /src/otbtf ownership to otbuser (but you still need 'sudo -i' in order to rebuild TF or OTB)
+RUN chown -R otbuser:otbuser /src/otbtf
+
+# This won't prevent ownership problems with volumes if you're not UID 1000
+USER otbuser
+# User-only ENV
+
+# Test python imports
+RUN python -c "import tensorflow"
+RUN python -c "import otbtf, tricks"
+RUN python -c "import otbApplication as otb; otb.Registry.CreateApplication('ImageClassifierFromDeepFeatures')"
diff --git a/include/otbTensorflowGraphOperations.cxx b/include/otbTensorflowGraphOperations.cxx
@@ -16,10 +16,10 @@ namespace tf {
 //
 // Restore a model from a path
 //
-void RestoreModel(const std::string path, tensorflow::SavedModelBundle & bundle)
+void RestoreModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle)
 {
   tensorflow::Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
-  checkpointPathTensor.scalar<std::string>()() = path;
+  checkpointPathTensor.scalar<tensorflow::tstring>()() = path;
   std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict =
   {{bundle.meta_graph_def.saver_def().filename_tensor_name(), checkpointPathTensor}};
   auto status = bundle.session->Run(feed_dict, {}, {bundle.meta_graph_def.saver_def().restore_op_name()}, nullptr);
@@ -32,10 +32,10 @@ void RestoreModel(const std::string path, tensorflow::SavedModelBundle & bundle)
 //
 // Restore a model from a path
 //
-void SaveModel(const std::string path, tensorflow::SavedModelBundle & bundle)
+void SaveModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle)
 {
   tensorflow::Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
-  checkpointPathTensor.scalar<std::string>()() = path;
+  checkpointPathTensor.scalar<tensorflow::tstring>()() = path;
   std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict =
   {{bundle.meta_graph_def.saver_def().filename_tensor_name(), checkpointPathTensor}};
   auto status = bundle.session->Run(feed_dict, {}, {bundle.meta_graph_def.saver_def().save_tensor_name()}, nullptr);
@@ -48,7 +48,7 @@ void SaveModel(const std::string path, tensorflow::SavedModelBundle & bundle)
 //
 // Load a session and a graph from a folder
 //
-void LoadModel(const std::string path, tensorflow::SavedModelBundle & bundle)
+void LoadModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle)
 {
 
   tensorflow::RunOptions runoptions;

diff --git a/include/otbTensorflowGraphOperations.h b/include/otbTensorflowGraphOperations.h
@@ -27,13 +27,13 @@ namespace otb {
 namespace tf {
 
 // Restore a model from a path
-void RestoreModel(const std::string path, tensorflow::SavedModelBundle & bundle);
+void RestoreModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle);
 
 // Restore a model from a path
-void SaveModel(const std::string path, tensorflow::SavedModelBundle & bundle);
+void SaveModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle);
 
 // Load a session and a graph from a folder
-void LoadModel(const std::string path, tensorflow::SavedModelBundle & bundle);
+void LoadModel(const tensorflow::tstring path, tensorflow::SavedModelBundle & bundle);
 
 // Load a graph from a .meta file
 tensorflow::GraphDef LoadGraph(std::string filename);

diff --git a/python/ckpt2savedmodel.py b/python/ckpt2savedmodel.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ==========================================================================
 #

diff --git a/python/create_savedmodel_ienco-m3_patchbased.py b/python/create_savedmodel_ienco-m3_patchbased.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ==========================================================================
 #

diff --git a/python/create_savedmodel_maggiori17_fullyconv.py b/python/create_savedmodel_maggiori17_fullyconv.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #==========================================================================
 #

diff --git a/python/create_savedmodel_pxs_fcn.py b/python/create_savedmodel_pxs_fcn.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ==========================================================================
 #

diff --git a/python/create_savedmodel_simple_cnn.py b/python/create_savedmodel_simple_cnn.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ==========================================================================
 #

diff --git a/python/create_savedmodel_simple_fcn.py b/python/create_savedmodel_simple_fcn.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ==========================================================================
 #

diff --git a/tools/docker/README.md b/tools/docker/README.md
@@ -0,0 +1,124 @@
+# Build with Docker
+Docker build has to be called from the root of the repository (i.e. `docker build .` or `bash tools/docker/multibuild.sh`).  
+You can build a custom image using `--build-arg` and several config files :
+- Ubuntu : `BASE_IMG` should accept any version, for additional packages see [build-deps-cli.txt](build-deps-cli.txt) and [build-deps-gui.txt](build-deps-gui.txt)
+- TensorFlow : `TF` arg for the git branch or tag + [build-env-tf.sh](build-env-tf.sh) and BZL_* arguments for the build configuration
+- OrfeoToolBox : `OTB` arg for the git branch or tag + [build-flags-otb.txt](build-flags-otb.txt) to edit cmake flags
+
+### Base images
+```bash
+UBUNTU=20.04            # or 16.04, 18.04
+CUDA=11.0.3             # or 10.1, 10.2
+CUDNN=8                 # or 7
+IMG=ubuntu:$UBUNTU
+GPU_IMG=nvidia/cuda:$CUDA-cudnn$CUDNN-devel-ubuntu$UBUNTU
+```
+
+### Default arguments
+```bash
+BASE_IMG                # mandatory
+CPU_RATIO=0.95
+GUI=false
+NUMPY_SPEC="~=1.19"
+TF=r2.4.1
+OTB=7.2.0
+BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package"
+BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt"
+BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090"
+KEEP_SRC_TF=false
+KEEP_SRC_OTB=false
+SUDO=true
+
+# NumPy version requirement :
+# TF <  2.4 : "numpy<1.19.0,>=1.16.0"
+# TF >= 2.4 : "numpy~=1.19"
+```
+
+### Bazel remote cache daemon
+If you just need to rebuild with different GUI or KEEP_SRC arguments, or may be a different branch of OTB, bazel cache will help you to rebuild everything except TF, even if the docker cache was purged (after `docker [system|builder] prune`).  
+In order to recycle the cache, bazel config and TF git tag should be exactly the same, any change in [build-env-tf.sh](build-env-tf.sh) and `--build-arg` (if related to bazel env, cuda, mkl, xla...) may result in a fresh new build.  
+
+Start a cache daemon - here with max 20GB but 12GB should be enough to save 2 TF builds (GPU and CPU):  
+```bash
+mkdir -p $HOME/.cache/bazel-remote
+docker run --detach -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20
+```
+Then just add ` --network='host'` to the docker build command, or connect bazel to a remote server - see 'BZL_OPTIONS'.  
+The other way of docker is a virtual bridge, but you'll need to edit the IP address.  
+
+## Build examples
+```bash
+# Build for CPU using default Dockerfiles args (without AWS, HDFS or GCP support)
+docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 .
+
+# Clear bazel config var (deactivate default optimizations and unset noaws/nogcp/nohdfs)
+docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS= .
+
+# Enable MKL
+MKL_CONFIG="--config=nogcp --config=noaws --config=nohdfs --config=opt --config=mkl"
+docker build --network='host' -t otbtf:cpu-mkl --build-arg BZL_CONFIGS="$MKL_CONFIG" --build-arg BASE_IMG=ubuntu:20.04 .
+
+# Build for GPU (if you're building for your system only you should edit CUDA_COMPUTE_CAPABILITIES in build-env-tf.sh)
+docker build --network='host' -t otbtf:gpu --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 .
+
+# Build dev with TF and OTB sources (huge image) + set git branches/tags to clone
+docker build --network='host' -t otbtf:gpu-dev-full --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 \
+    --build-arg KEEP_SRC_OTB=true --buid-arg KEEP_SRC_TF=true --build-arg TF=nightly --build-arg OTB=develop .
+
+# Build old release
+docker build --network='host' -t otbtf:oldstable-gpu --build-arg BASE_IMG=nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 \
+    --build-arg TF=r2.1 --build-arg NUMPY_SPEC="<1.19" \
+    --build-arg BAZEL_OPTIONS="--noincompatible_do_not_split_linking_cmdline --verbose_failures --remote_cache=http://localhost:9090" .
+# You could edit the Dockerfile in order to clone an old branch of the repo instead of copying files from the build context
+```
+
+### Debug build
+If you fail to build, you can log into the last layer and check CMake logs. Run `docker images`, find the latest layer ID and run a tmp container (`docker run -it d60496d9612e bash`).  
+You may also need to split some multi-command layers in the Dockerfile.  
+If you see OOM errors during SuperBuild you should decrease CPU_RATIO (e.g. 0.75).  
+
+## Container examples
+```bash
+# Pull GPU image and create a new container with your home directory as volume (requires apt package nvidia-docker2 and CUDA>=11.0)
+docker create --gpus=all --volume $HOME:/home/otbuser/volume -it --name otbtf-gpu mdl4eo/otbtf2.1:gpu
+
+# Run interactive
+docker start -i otbtf-gpu
+
+# Run in background
+docker start otbtf-gpu
+docker exec otbtf-gpu python -c 'import tensorflow as tf; print(tf.test.is_gpu_available())'
+```
+
+### Rebuild OTB with more modules
+```bash
+docker create --gpus=all -it --name otbtf-gpu-dev mdl4eo/otbtf2.1:gpu-dev
+docker start -i otbtf-gpu-dev
+```
+```bash
+# From the container shell:
+sudo -i
+cd /src/otb/otb/Modules/Remote
+git clone https://gitlab.irstea.fr/raffaele.gaetano/otbSelectiveHaralickTextures.git
+cd /src/otb/build/OTB/build
+cmake -DModule_OTBAppSelectiveHaralickTextures=ON /src/otb/otb && make install -j
+```
+
+### Container with GUI
+```bash
+# GUI is disabled by default in order to save space, and because docker xvfb isn't working properly with OpenGL.
+# => otbgui seems OK but monteverdi isn't working
+docker build --network='host' -t otbtf:cpu-gui --build-arg BASE_IMG=ubuntu:20.04 --build-arg GUI=true .
+docker create -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY -it --name otbtf-gui otbtf:cpu-gui
+docker start -i otbtf-gui
+$ mapla
+```
+
+### Common errors
+Buid :  
+`Error response from daemon: manifest for nvidia/cuda:11.0-cudnn8-devel-ubuntu20.04 not found: manifest unknown: manifest unknown`  
+=> Image is missing from dockerhub
+
+Run :  
+`failed call to cuInit: UNKNOWN ERROR (303) / no NVIDIA GPU device is present: /dev/nvidia0 does not exist`  
+=> Nvidia driver is missing or disabled, make sure to add ` --gpus=all` to your docker run or create command