Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: avoid re-entering XDG aliasing commands #444

Merged
merged 15 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,15 @@ jobs:
- setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/
- run:
name: build image
command: make docker-maximum-cuda GIT_DEPTH=--single-branch
command: make docker-maximum-cuda GIT_DEPTH=--single-branch DOCKER_PARALLEL=-j4
no_output_timeout: 30m
- run:
name: test image
command: |
mkdir test-results
docker run --rm -v $PWD:/data ocrd/all:maximum-cuda make -C /build/core deps-test test PYTEST_ARGS=--junitxml=/data/test-results/core.xml
- store_test_results:
path: test-results
- when:
# takes too long for 1h1m CircleCI timeout overall
# also, storage is limited...
Expand Down
17 changes: 10 additions & 7 deletions .github/workflows/makedocker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,22 @@ jobs:
if: github.event.inputs.upterm-session == 'true'
uses: lhotari/action-upterm@v1
- name: Make Docker image
run: make docker-${{ github.event.inputs.docker-image }} GIT_DEPTH=--single-branch
run: make docker-${{ github.event.inputs.docker-image }} GIT_DEPTH=--single-branch DOCKER_PARALLEL=-j4
- name: Generate ocrd-all-tool.json
# the Docker build will set OCRD_MODULES inside the image, which we can re-use
# regardless of whether we have /build, we can just use the Makefile from outside again
# the Docker build will already have created this internally
# so just extract and store it here
run: |
export OCRD_MODULES=$(docker run --rm ocrd/all:${{ github.event.inputs.docker-image }} bash -c 'echo $OCRD_MODULES')
make ocrd-all-tool.json
wc -l ocrd-all-tool.json
id=$(docker create ocrd/all:${{ github.event.inputs.docker-image }})
docker cp $id:/build/ocrd-all-tool.json .
docker cp $id:/build/ocrd-all-module-dir.json .
wc -l ocrd-all-tool.json ocrd-all-module-dir.json
- name: Upload ocrd-all-tool.json
uses: actions/upload-artifact@v4
with:
name: ${{ github.event.inputs.docker-image }}_ocrd-all-tool.json
path: ./ocrd-all-tool.json
path: |
./ocrd-all-tool.json
./ocrd-all-module-dir.json
# if-no-files-found: error
- name: Login to Docker Hub
if: github.event.inputs.upload-dockerhub == 'true'
Expand Down
64 changes: 34 additions & 30 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ ARG PARALLEL=""

# increase default network timeouts (so builds don't fail because of small bandwidth)
ENV PIP_OPTIONS="--timeout=3000 ${PIP_OPTIONS}"
RUN echo "Acquire::http::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network
RUN echo "Acquire::https::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network
RUN echo "Acquire::ftp::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network
RUN echo "Acquire::http::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network && \
echo "Acquire::https::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network && \
echo "Acquire::ftp::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network

WORKDIR /build

Expand All @@ -104,31 +104,27 @@ RUN apt-get -y update && apt-get install -y apt-utils
# avoid git submodule update (keep at build context)
ENV NO_UPDATE=1

# start a shell script (so we can comment individual steps here)
RUN echo "set -ex" > docker.sh
# run build in one layer/step (to minimise image size)
RUN set -ex && \
# get packages for build
RUN echo "apt-get -y install automake autoconf libtool pkg-config g++" >> docker.sh
apt-get -y install automake autoconf libtool pkg-config g++ && \
# ensure no additional git actions happen after copying the checked out modules
# try to fetch all modules system requirements
RUN echo "make deps-ubuntu" >> docker.sh
RUN echo "source $VIRTUAL_ENV/bin/activate" >> docker.sh
RUN echo "pip install -U pip setuptools wheel" >> docker.sh
RUN echo "hash -r" >> docker.sh
make deps-ubuntu && \
. $VIRTUAL_ENV/bin/activate && \
pip install -U pip setuptools wheel && \
hash -r && \
# build/install all tools of the requested modules:
RUN echo "make $PARALLEL all" >> docker.sh
# preinstall ocrd-all-tool.json and ocrd-all-module-dir.json
RUN echo "make ocrd-all-tool.json ocrd-all-module-dir.json" >> docker.sh
make $PARALLEL all && \
# remove unneeded automatic deps and clear pkg cache
RUN echo "apt-get -y remove automake autoconf libtool pkg-config g++ && apt-get -y clean" >> docker.sh
apt-get -y remove automake autoconf libtool pkg-config g++ && \
apt-get -y clean && \
# clean-up some temporary files (git repos are also installation targets and must be kept)
RUN echo "make -i clean-tesseract" >> docker.sh
RUN echo "make -i clean-olena" >> docker.sh
RUN echo "rm -fr /.cache" >> docker.sh
# run the script in one layer/step (to minimise image size)
# (and export all variables)
RUN set -a; bash docker.sh
make -i clean-tesseract && \
make -i clean-olena && \
rm -fr /.cache && \
# update ld.so cache for new libs in /usr/local
RUN ldconfig
ldconfig
# check installation
RUN make -j4 check CHECK_HELP=1
RUN if echo $BASE_IMAGE | fgrep -q cuda; then make fix-cuda; fi
Expand All @@ -137,23 +133,31 @@ RUN if echo $BASE_IMAGE | fgrep -q cuda; then make fix-cuda; fi
# to mount for model persistence; with named volumes, the preinstalled models
# will be copied to the host and complemented by downloaded models; tessdata
# is the only problematic module location
RUN mkdir -p $XDG_DATA_HOME/tessdata
RUN mv $XDG_DATA_HOME/tessdata $XDG_CONFIG_HOME/ocrd-tesserocr-recognize
RUN ln -s $XDG_CONFIG_HOME/ocrd-tesserocr-recognize $XDG_DATA_HOME/tessdata
RUN mkdir -p $XDG_DATA_HOME/tessdata; \
# as seen in #394, this must never be repeated
if ! test -d $XDG_CONFIG_HOME/ocrd-tesserocr-recognize; then \
mv -v $XDG_DATA_HOME/tessdata $XDG_CONFIG_HOME/ocrd-tesserocr-recognize && \
ln -vs $XDG_CONFIG_HOME/ocrd-tesserocr-recognize $XDG_DATA_HOME/tessdata; fi

# finally, alias/symlink all ocrd-resources to /models for shorter mount commands
RUN mkdir -p $XDG_CONFIG_HOME
RUN mv $XDG_CONFIG_HOME /models && ln -s /models $XDG_CONFIG_HOME
RUN mkdir -p $XDG_CONFIG_HOME; \
# as seen in #394, this must never be repeated
if ! test -d /models; then \
mv -v $XDG_CONFIG_HOME /models && \
ln -vs /models $XDG_CONFIG_HOME; fi; \
# ensure unprivileged users can download models, too
RUN chmod go+rwx /models
chmod go+rwx /models

# smoke-test resmgr
RUN ocrd resmgr list-installed

# remove (dated) security workaround preventing use of
# ImageMagick's convert on PDF/PS/EPS/XPS:
RUN sed -i 's/rights="none"/rights="read|write"/g' /etc/ImageMagick-6/policy.xml || true
RUN sed -i 's/rights="none"/rights="read|write"/g' /etc/ImageMagick-6/policy.xml; \
# prevent cache resources exhausted errors
RUN sed -i 's/name="disk" value="1GiB"/name="disk" value="8GiB"/g' /etc/ImageMagick-6/policy.xml || true
sed -i 's/name="disk" value="1GiB"/name="disk" value="8GiB"/g' /etc/ImageMagick-6/policy.xml; \
# relax overly restrictive maximum resolution
RUN sed -i '/width\|height/s/value="16KP"/value="64KP"/' /etc/ImageMagick-6/policy.xml || true
sed -i '/width\|height/s/value="16KP"/value="64KP"/' /etc/ImageMagick-6/policy.xml; true

# avoid default prompt with user name, because likely we will use host UID without host /etc/passwd
# cannot just set ENV, because internal bashrc will override it anyway
Expand Down
12 changes: 8 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ $(BIN)/ocrd: core
else
CUSTOM_DEPS += python3 imagemagick libgeos-dev
$(BIN)/ocrd: | $(ACTIVATE_VENV)
. $(ACTIVATE_VENV) && $(SEMPIP) pip install $(PIP_OPTIONS_E) ocrd ocrd_network
. $(ACTIVATE_VENV) && $(SEMPIP) pip install $(PIP_OPTIONS_E) ocrd
endif

.PHONY: test-core
Expand Down Expand Up @@ -722,7 +722,11 @@ $(OCRD_EXECUTABLES): | $(BIN)/wheel
$(filter-out $(BIN)/ocrd,$(OCRD_EXECUTABLES)): $(BIN)/ocrd

# At last, we know what all OCRD_EXECUTABLES are:
all: $(OCRD_MODULES) $(OCRD_EXECUTABLES)
# (json targets depend on OCRD_MODULES and OCRD_EXECUTABLES)
all: ocrd-all-tool.json ocrd-all-module-dir.json
. $(ACTIVATE_VENV) && cp -f $^ `python -c "import ocrd; print(ocrd.__path__[0])"`
if test -d $(SUB_VENV_TF1); then . $(SUB_VENV_TF1)/bin/activate && cp -f $^ `python -c "import ocrd; print(ocrd.__path__[0])"`; fi

show:
@echo VIRTUAL_ENV = $(VIRTUAL_ENV)
@echo OCRD_MODULES = $(OCRD_MODULES)
Expand Down Expand Up @@ -843,9 +847,9 @@ dockers: docker-minimum docker-minimum-cuda docker-medium docker-medium-cuda doc
docker-%: PIP_OPTIONS = -e

# Minimum-size selection: use Ocropy binarization, use Tesseract from git
docker-mini%: DOCKER_MODULES := ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client
docker-mini%: DOCKER_MODULES := core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client
# Medium-size selection: add Olena binarization and Calamari, add evaluation
docker-medi%: DOCKER_MODULES := cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration
docker-medi%: DOCKER_MODULES := core cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration
# Maximum-size selection: use all modules
docker-maxi%: DOCKER_MODULES := $(OCRD_MODULES)

Expand Down
2 changes: 1 addition & 1 deletion ocrd_fileformat