Skip to content

Commit

Permalink
Switch to docker volumes in model generation (#7910)
Browse files Browse the repository at this point in the history
  • Loading branch information
mc-nv authored Jan 2, 2025
1 parent be07557 commit 997bb70
Show file tree
Hide file tree
Showing 3 changed files with 521 additions and 515 deletions.
219 changes: 91 additions & 128 deletions qa/common/gen_jetson_trt_models
Original file line number Diff line number Diff line change
Expand Up @@ -39,153 +39,116 @@ TRITON_VERSION=${TRITON_VERSION:=24.11}
CUDA_DEVICE=${RUNNER_ID:=0}
# Set TensorRT image
TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3-igpu}

# Set the path to the host working directory
HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp/models_build}
# Set the path to the host model output directory
HOST_MODEL_DIR=${HOST_MODEL_DIR:="${HOST_BUILD_DIR}/${TRITON_VERSION}"}
# Set the source directory to store executable source file to generate models
HOST_SRCDIR=${HOST_SRCDIR:=$HOST_BUILD_DIR/gen_srcdir}
UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:24.04}

# Set CI specific parameters
DOCKER_GPU_ARGS=${DOCKER_GPU_ARGS:-$([[ $RUNNER_GPUS =~ ^[0-9] ]] && eval $NV_DOCKER_ARGS || echo "--gpus device=$CUDA_DEVICE" )}

# Set model output directories
############################################################################
# Check if Docker volume exists
############################################################################
CI_JOB_ID=${CI_JOB_ID:=$(date +%Y%m%d_%H%M)}
DOCKER_VOLUME=${DOCKER_VOLUME:=volume_gen_qa_model_repositor_${CI_JOB_ID}}
if ! docker volume inspect $DOCKER_VOLUME > /dev/null 2>&1; then
echo "Docker volume $DOCKER_VOLUME does not exist. Creating..."
docker volume create $DOCKER_VOLUME
docker volume inspect $DOCKER_VOLUME
fi

HOST_DESTDIR=$HOST_MODEL_DIR/qa_model_repository
HOST_DATADEPENDENTDIR=$HOST_MODEL_DIR/qa_trt_data_dependent_model_repository
HOST_DYNASEQDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_model_repository
HOST_DYNASEQIMPLICITDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
HOST_FORMATDESTDIR=$HOST_MODEL_DIR/qa_trt_format_model_repository
HOST_IDENTITYBIGDESTDIR=$HOST_MODEL_DIR/qa_identity_big_model_repository
HOST_IDENTITYDESTDIR=$HOST_MODEL_DIR/qa_identity_model_repository
HOST_IMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_implicit_model_repository
HOST_RAGGEDDESTDIR=$HOST_MODEL_DIR/qa_ragged_model_repository
HOST_RESHAPEDESTDIR=$HOST_MODEL_DIR/qa_reshape_model_repository
HOST_SEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_model_repository
HOST_SHAPEDESTDIR=$HOST_MODEL_DIR/qa_shapetensor_model_repository
HOST_VARDESTDIR=$HOST_MODEL_DIR/qa_variable_model_repository
HOST_VARIMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_implicit_model_repository
HOST_VARSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_model_repository
docker rm -f $DOCKER_VOLUME
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE mkdir -p gen_srcdir ${TRITON_VERSION}
docker create --name $DOCKER_VOLUME -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE
docker cp . $DOCKER_VOLUME:/mnt/$CI_JOB_ID/gen_srcdir

# Clean up host working directory
if [ -z "${CI}" ]; then
rm -frv $HOST_BUILD_DIR ;
else
rm -frv $HOST_BUILD_DIR/*/plan*
fi
# Set model output directories
VOLUME_BUILD_DIR=${VOLUME_BUILD_DIR:=/mnt/$CI_JOB_ID}
VOLUME_SRCDIR=${VOLUME_SRCDIR:=$VOLUME_BUILD_DIR/gen_srcdir}

VOLUME_DESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_model_repository
VOLUME_DATADEPENDENTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_data_dependent_model_repository
VOLUME_DYNASEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_model_repository
VOLUME_DYNASEQIMPLICITDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_implicit_model_repository
VOLUME_FORMATDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_format_model_repository
VOLUME_IDENTITYBIGDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_big_model_repository
VOLUME_IDENTITYDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_model_repository
VOLUME_IMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_implicit_model_repository
VOLUME_RAGGEDDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_ragged_model_repository
VOLUME_RESHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_reshape_model_repository
VOLUME_SEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_model_repository
VOLUME_SHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_shapetensor_model_repository
VOLUME_VARDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_model_repository
VOLUME_VARIMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_implicit_model_repository
VOLUME_VARSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_model_repository

# Create the model output directories
mkdir -p $HOST_SRCDIR
mkdir -p $HOST_DESTDIR
mkdir -p $HOST_DATADEPENDENTDIR
mkdir -p $HOST_DYNASEQDESTDIR
mkdir -p $HOST_DYNASEQIMPLICITDESTDIR
mkdir -p $HOST_FORMATDESTDIR
mkdir -p $HOST_IDENTITYBIGDESTDIR
mkdir -p $HOST_IDENTITYDESTDIR
mkdir -p $HOST_IMPLICITSEQDESTDIR
mkdir -p $HOST_RAGGEDDESTDIR
mkdir -p $HOST_RESHAPEDESTDIR
mkdir -p $HOST_SEQDESTDIR
mkdir -p $HOST_SHAPEDESTDIR
mkdir -p $HOST_VARDESTDIR
mkdir -p $HOST_VARIMPLICITSEQDESTDIR
mkdir -p $HOST_VARSEQDESTDIR

# Copy the executable source file to the host generate models source directory
cp ./gen_ensemble_model_utils.py $HOST_SRCDIR/.
cp ./gen_common.py $HOST_SRCDIR/.
cp ./gen_qa_dyna_sequence_implicit_models.py $HOST_SRCDIR/.
cp ./gen_qa_dyna_sequence_models.py $HOST_SRCDIR/.
cp ./gen_qa_identity_models.py $HOST_SRCDIR/.
cp ./gen_qa_implicit_models.py $HOST_SRCDIR/.
cp ./gen_qa_models.py $HOST_SRCDIR/.
cp ./gen_qa_noshape_models.py $HOST_SRCDIR/.
cp ./gen_qa_ragged_models.py $HOST_SRCDIR/.
cp ./gen_qa_reshape_models.py $HOST_SRCDIR/.
cp ./gen_qa_sequence_models.py $HOST_SRCDIR/.
cp ./gen_qa_trt_data_dependent_shape.py $HOST_SRCDIR/.
cp ./gen_qa_trt_format_models.py $HOST_SRCDIR/.
cp ./gen_qa_trt_plugin_models.py $HOST_SRCDIR/.
cp ./test_util.py $HOST_SRCDIR/.
# Clean up host working directory
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE \
mkdir -p \
$VOLUME_SRCDIR \
$VOLUME_DESTDIR \
$VOLUME_DATADEPENDENTDIR \
$VOLUME_DYNASEQDESTDIR \
$VOLUME_DYNASEQIMPLICITDESTDIR \
$VOLUME_FORMATDESTDIR \
$VOLUME_IDENTITYBIGDESTDIR \
$VOLUME_IDENTITYDESTDIR \
$VOLUME_IMPLICITSEQDESTDIR \
$VOLUME_RAGGEDDESTDIR \
$VOLUME_RESHAPEDESTDIR \
$VOLUME_SEQDESTDIR \
$VOLUME_SHAPEDESTDIR \
$VOLUME_VARDESTDIR \
$VOLUME_VARIMPLICITSEQDESTDIR \
$VOLUME_VARSEQDESTDIR

# Set TensorRT model generation script name
TRT_MODEL_SCRIPT=trt_gen.cmds

# Set container working directory
CONTAINER_SOURCE_DIR=/workspace/src
CONTAINER_MODEL_DIR=/tmp/models
CONTAINER_DESTDIR=$CONTAINER_MODEL_DIR/qa_model_repository
CONTAINER_DATADEPENDENTDIR=$CONTAINER_MODEL_DIR/qa_trt_data_dependent_model_repository
CONTAINER_DYNASEQDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_model_repository
CONTAINER_DYNASEQIMPLICITDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
CONTAINER_FORMATDESTDIR=$CONTAINER_MODEL_DIR/qa_trt_format_model_repository
CONTAINER_IDENTITYBIGDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_big_model_repository
CONTAINER_IDENTITYDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_model_repository
CONTAINER_IMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_implicit_model_repository
CONTAINER_RAGGEDDESTDIR=$CONTAINER_MODEL_DIR/qa_ragged_model_repository
CONTAINER_RESHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_reshape_model_repository
CONTAINER_SEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_model_repository
CONTAINER_SHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_shapetensor_model_repository
CONTAINER_VARDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_model_repository
CONTAINER_VARIMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_implicit_model_repository
CONTAINER_VARSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_model_repository
TRT_MODEL_SCRIPT=gen.TensorRT.gen_jetson_trt_models.cmds

# Set script to generate TensorRT models
cat >$HOST_SRCDIR/$TRT_MODEL_SCRIPT <<EOF
cat > $TRT_MODEL_SCRIPT <<EOF
#!/bin/bash -xe
# Make all generated files accessible outside of container
umask 0000
nvidia-smi -L || true
export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
ldconfig || true
cd $CONTAINER_SOURCE_DIR
cd $VOLUME_SRCDIR
# Models using shape tensor i/o
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --models_dir=$CONTAINER_DESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --variable --models_dir=$CONTAINER_VARDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt --models_dir=$CONTAINER_IDENTITYDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$CONTAINER_IDENTITYBIGDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$CONTAINER_RESHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --models_dir=$CONTAINER_SEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --models_dir=$CONTAINER_IMPLICITSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$CONTAINER_VARIMPLICITSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$CONTAINER_VARSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQIMPLICITDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_ragged_models.py --tensorrt --models_dir=$CONTAINER_RAGGEDDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_format_models.py --models_dir=$CONTAINER_FORMATDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$CONTAINER_DATADEPENDENTDIR
chmod -R 777 $CONTAINER_MODEL_DIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --models_dir=$VOLUME_DESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VOLUME_VARDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$VOLUME_IDENTITYDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$VOLUME_IDENTITYBIGDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$VOLUME_RESHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --models_dir=$VOLUME_SEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --models_dir=$VOLUME_IMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$VOLUME_VARIMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$VOLUME_DYNASEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$VOLUME_VARSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$VOLUME_DYNASEQIMPLICITDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_RAGGEDDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
EOF
# Make the TensorRT model generation script executable
chmod a+x $HOST_SRCDIR/$TRT_MODEL_SCRIPT
# Pull the TensorRT image

chmod a+x $TRT_MODEL_SCRIPT

docker cp $TRT_MODEL_SCRIPT $DOCKER_VOLUME:$VOLUME_SRCDIR

docker pull $TENSORRT_IMAGE
# Run the TensorRT model generation script inside the TensorRT container
docker run \
$DOCKER_GPU_ARGS \
--rm --entrypoint $CONTAINER_SOURCE_DIR/$TRT_MODEL_SCRIPT \
--mount type=bind,source=$HOST_SRCDIR,target=$CONTAINER_SOURCE_DIR \
--mount type=bind,source=$HOST_DESTDIR,target=$CONTAINER_DESTDIR \
--mount type=bind,source=$HOST_DATADEPENDENTDIR,target=$CONTAINER_DATADEPENDENTDIR \
--mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$CONTAINER_DYNASEQDESTDIR \
--mount type=bind,source=$HOST_DYNASEQIMPLICITDESTDIR,target=$CONTAINER_DYNASEQIMPLICITDESTDIR \
--mount type=bind,source=$HOST_FORMATDESTDIR,target=$CONTAINER_FORMATDESTDIR \
--mount type=bind,source=$HOST_IDENTITYBIGDESTDIR,target=$CONTAINER_IDENTITYBIGDESTDIR \
--mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$CONTAINER_IDENTITYDESTDIR \
--mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$CONTAINER_IMPLICITSEQDESTDIR \
--mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$CONTAINER_RAGGEDDESTDIR \
--mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$CONTAINER_RESHAPEDESTDIR \
--mount type=bind,source=$HOST_SEQDESTDIR,target=$CONTAINER_SEQDESTDIR \
--mount type=bind,source=$HOST_SHAPEDESTDIR,target=$CONTAINER_SHAPEDESTDIR \
--mount type=bind,source=$HOST_VARDESTDIR,target=$CONTAINER_VARDESTDIR \
--mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$CONTAINER_VARIMPLICITSEQDESTDIR \
--mount type=bind,source=$HOST_VARSEQDESTDIR,target=$CONTAINER_VARSEQDESTDIR \
$TENSORRT_IMAGE

docker run $DOCKER_GPU_ARGS \
--rm -v $DOCKER_VOLUME:/mnt \
$TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT

# Copy generated models to /tmp/ if not running in CI
if [ -z $CI ] ; then
echo "Copying generated models to /tmp/"
docker cp $DOCKER_VOLUME:$VOLUME_BUILD_DIR/$TRITON_VERSION /tmp/
echo "Removing Docker volume $DOCKER_VOLUME"
docker rm -f $DOCKER_VOLUME
docker volume rm $DOCKER_VOLUME
fi
Loading

0 comments on commit 997bb70

Please sign in to comment.