Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to docker volumes in model generation #7910

Merged
merged 26 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 91 additions & 128 deletions qa/common/gen_jetson_trt_models
Original file line number Diff line number Diff line change
Expand Up @@ -39,153 +39,116 @@ TRITON_VERSION=${TRITON_VERSION:=24.11}
CUDA_DEVICE=${RUNNER_ID:=0}
# Set TensorRT image
TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3-igpu}

# Set the path to the host working directory
HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp/models_build}
# Set the path to the host model output directory
HOST_MODEL_DIR=${HOST_MODEL_DIR:="${HOST_BUILD_DIR}/${TRITON_VERSION}"}
# Set the source directory to store executable source file to generate models
HOST_SRCDIR=${HOST_SRCDIR:=$HOST_BUILD_DIR/gen_srcdir}
UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:24.04}

# Set CI specific parameters
DOCKER_GPU_ARGS=${DOCKER_GPU_ARGS:-$([[ $RUNNER_GPUS =~ ^[0-9] ]] && eval $NV_DOCKER_ARGS || echo "--gpus device=$CUDA_DEVICE" )}

# Set model output directories
############################################################################
# Check if Docker volume exists
############################################################################
CI_JOB_ID=${CI_JOB_ID:=$(date +%Y%m%d_%H%M)}
DOCKER_VOLUME=${DOCKER_VOLUME:=volume_gen_qa_model_repositor_${CI_JOB_ID}}
if ! docker volume inspect $DOCKER_VOLUME > /dev/null 2>&1; then
echo "Docker volume $DOCKER_VOLUME does not exist. Creating..."
docker volume create $DOCKER_VOLUME
docker volume inspect $DOCKER_VOLUME
fi

HOST_DESTDIR=$HOST_MODEL_DIR/qa_model_repository
HOST_DATADEPENDENTDIR=$HOST_MODEL_DIR/qa_trt_data_dependent_model_repository
HOST_DYNASEQDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_model_repository
HOST_DYNASEQIMPLICITDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
HOST_FORMATDESTDIR=$HOST_MODEL_DIR/qa_trt_format_model_repository
HOST_IDENTITYBIGDESTDIR=$HOST_MODEL_DIR/qa_identity_big_model_repository
HOST_IDENTITYDESTDIR=$HOST_MODEL_DIR/qa_identity_model_repository
HOST_IMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_implicit_model_repository
HOST_RAGGEDDESTDIR=$HOST_MODEL_DIR/qa_ragged_model_repository
HOST_RESHAPEDESTDIR=$HOST_MODEL_DIR/qa_reshape_model_repository
HOST_SEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_model_repository
HOST_SHAPEDESTDIR=$HOST_MODEL_DIR/qa_shapetensor_model_repository
HOST_VARDESTDIR=$HOST_MODEL_DIR/qa_variable_model_repository
HOST_VARIMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_implicit_model_repository
HOST_VARSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_model_repository
docker rm -f $DOCKER_VOLUME
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE mkdir -p gen_srcdir ${TRITON_VERSION}
docker create --name $DOCKER_VOLUME -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE
docker cp . $DOCKER_VOLUME:/mnt/$CI_JOB_ID/gen_srcdir

# Clean up host working directory
if [ -z "${CI}" ]; then
rm -frv $HOST_BUILD_DIR ;
else
rm -frv $HOST_BUILD_DIR/*/plan*
fi
# Set model output directories
VOLUME_BUILD_DIR=${VOLUME_BUILD_DIR:=/mnt/$CI_JOB_ID}
VOLUME_SRCDIR=${VOLUME_SRCDIR:=$VOLUME_BUILD_DIR/gen_srcdir}

VOLUME_DESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_model_repository
VOLUME_DATADEPENDENTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_data_dependent_model_repository
VOLUME_DYNASEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_model_repository
VOLUME_DYNASEQIMPLICITDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_implicit_model_repository
VOLUME_FORMATDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_format_model_repository
VOLUME_IDENTITYBIGDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_big_model_repository
VOLUME_IDENTITYDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_model_repository
VOLUME_IMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_implicit_model_repository
VOLUME_RAGGEDDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_ragged_model_repository
VOLUME_RESHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_reshape_model_repository
VOLUME_SEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_model_repository
VOLUME_SHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_shapetensor_model_repository
VOLUME_VARDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_model_repository
VOLUME_VARIMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_implicit_model_repository
VOLUME_VARSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_model_repository

# Create the model output directories
mkdir -p $HOST_SRCDIR
mkdir -p $HOST_DESTDIR
mkdir -p $HOST_DATADEPENDENTDIR
mkdir -p $HOST_DYNASEQDESTDIR
mkdir -p $HOST_DYNASEQIMPLICITDESTDIR
mkdir -p $HOST_FORMATDESTDIR
mkdir -p $HOST_IDENTITYBIGDESTDIR
mkdir -p $HOST_IDENTITYDESTDIR
mkdir -p $HOST_IMPLICITSEQDESTDIR
mkdir -p $HOST_RAGGEDDESTDIR
mkdir -p $HOST_RESHAPEDESTDIR
mkdir -p $HOST_SEQDESTDIR
mkdir -p $HOST_SHAPEDESTDIR
mkdir -p $HOST_VARDESTDIR
mkdir -p $HOST_VARIMPLICITSEQDESTDIR
mkdir -p $HOST_VARSEQDESTDIR

# Copy the executable source file to the host generate models source directory
cp ./gen_ensemble_model_utils.py $HOST_SRCDIR/.
cp ./gen_common.py $HOST_SRCDIR/.
cp ./gen_qa_dyna_sequence_implicit_models.py $HOST_SRCDIR/.
cp ./gen_qa_dyna_sequence_models.py $HOST_SRCDIR/.
cp ./gen_qa_identity_models.py $HOST_SRCDIR/.
cp ./gen_qa_implicit_models.py $HOST_SRCDIR/.
cp ./gen_qa_models.py $HOST_SRCDIR/.
cp ./gen_qa_noshape_models.py $HOST_SRCDIR/.
cp ./gen_qa_ragged_models.py $HOST_SRCDIR/.
cp ./gen_qa_reshape_models.py $HOST_SRCDIR/.
cp ./gen_qa_sequence_models.py $HOST_SRCDIR/.
cp ./gen_qa_trt_data_dependent_shape.py $HOST_SRCDIR/.
cp ./gen_qa_trt_format_models.py $HOST_SRCDIR/.
cp ./gen_qa_trt_plugin_models.py $HOST_SRCDIR/.
cp ./test_util.py $HOST_SRCDIR/.
# Clean up host working directory
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE \
mkdir -p \
$VOLUME_SRCDIR \
$VOLUME_DESTDIR \
$VOLUME_DATADEPENDENTDIR \
$VOLUME_DYNASEQDESTDIR \
$VOLUME_DYNASEQIMPLICITDESTDIR \
$VOLUME_FORMATDESTDIR \
$VOLUME_IDENTITYBIGDESTDIR \
$VOLUME_IDENTITYDESTDIR \
$VOLUME_IMPLICITSEQDESTDIR \
$VOLUME_RAGGEDDESTDIR \
$VOLUME_RESHAPEDESTDIR \
$VOLUME_SEQDESTDIR \
$VOLUME_SHAPEDESTDIR \
$VOLUME_VARDESTDIR \
$VOLUME_VARIMPLICITSEQDESTDIR \
$VOLUME_VARSEQDESTDIR

# Set TensorRT model generation script name
TRT_MODEL_SCRIPT=trt_gen.cmds

# Set container working directory
CONTAINER_SOURCE_DIR=/workspace/src
CONTAINER_MODEL_DIR=/tmp/models
CONTAINER_DESTDIR=$CONTAINER_MODEL_DIR/qa_model_repository
CONTAINER_DATADEPENDENTDIR=$CONTAINER_MODEL_DIR/qa_trt_data_dependent_model_repository
CONTAINER_DYNASEQDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_model_repository
CONTAINER_DYNASEQIMPLICITDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
CONTAINER_FORMATDESTDIR=$CONTAINER_MODEL_DIR/qa_trt_format_model_repository
CONTAINER_IDENTITYBIGDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_big_model_repository
CONTAINER_IDENTITYDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_model_repository
CONTAINER_IMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_implicit_model_repository
CONTAINER_RAGGEDDESTDIR=$CONTAINER_MODEL_DIR/qa_ragged_model_repository
CONTAINER_RESHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_reshape_model_repository
CONTAINER_SEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_model_repository
CONTAINER_SHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_shapetensor_model_repository
CONTAINER_VARDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_model_repository
CONTAINER_VARIMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_implicit_model_repository
CONTAINER_VARSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_model_repository
TRT_MODEL_SCRIPT=gen.TensorRT.gen_jetson_trt_models.cmds

# Set script to generate TensorRT models
cat >$HOST_SRCDIR/$TRT_MODEL_SCRIPT <<EOF
cat > $TRT_MODEL_SCRIPT <<EOF
#!/bin/bash -xe
# Make all generated files accessible outside of container
umask 0000
nvidia-smi -L || true
export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
ldconfig || true

cd $CONTAINER_SOURCE_DIR
cd $VOLUME_SRCDIR
# Models using shape tensor i/o
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --models_dir=$CONTAINER_DESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --variable --models_dir=$CONTAINER_VARDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt --models_dir=$CONTAINER_IDENTITYDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$CONTAINER_IDENTITYBIGDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$CONTAINER_RESHAPEDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --models_dir=$CONTAINER_SEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --models_dir=$CONTAINER_IMPLICITSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$CONTAINER_VARIMPLICITSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$CONTAINER_VARSEQDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQIMPLICITDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_ragged_models.py --tensorrt --models_dir=$CONTAINER_RAGGEDDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_format_models.py --models_dir=$CONTAINER_FORMATDESTDIR
python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$CONTAINER_DATADEPENDENTDIR

chmod -R 777 $CONTAINER_MODEL_DIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --models_dir=$VOLUME_DESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VOLUME_VARDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$VOLUME_IDENTITYDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$VOLUME_IDENTITYBIGDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$VOLUME_RESHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --models_dir=$VOLUME_SEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --models_dir=$VOLUME_IMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$VOLUME_VARIMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$VOLUME_DYNASEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$VOLUME_VARSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$VOLUME_DYNASEQIMPLICITDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_RAGGEDDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR

EOF
# Make the TensorRT model generation script executable
chmod a+x $HOST_SRCDIR/$TRT_MODEL_SCRIPT
# Pull the TensorRT image

chmod a+x $TRT_MODEL_SCRIPT

docker cp $TRT_MODEL_SCRIPT $DOCKER_VOLUME:$VOLUME_SRCDIR

docker pull $TENSORRT_IMAGE
# Run the TensorRT model generation script inside the TensorRT container
docker run \
$DOCKER_GPU_ARGS \
--rm --entrypoint $CONTAINER_SOURCE_DIR/$TRT_MODEL_SCRIPT \
--mount type=bind,source=$HOST_SRCDIR,target=$CONTAINER_SOURCE_DIR \
--mount type=bind,source=$HOST_DESTDIR,target=$CONTAINER_DESTDIR \
--mount type=bind,source=$HOST_DATADEPENDENTDIR,target=$CONTAINER_DATADEPENDENTDIR \
--mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$CONTAINER_DYNASEQDESTDIR \
--mount type=bind,source=$HOST_DYNASEQIMPLICITDESTDIR,target=$CONTAINER_DYNASEQIMPLICITDESTDIR \
--mount type=bind,source=$HOST_FORMATDESTDIR,target=$CONTAINER_FORMATDESTDIR \
--mount type=bind,source=$HOST_IDENTITYBIGDESTDIR,target=$CONTAINER_IDENTITYBIGDESTDIR \
--mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$CONTAINER_IDENTITYDESTDIR \
--mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$CONTAINER_IMPLICITSEQDESTDIR \
--mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$CONTAINER_RAGGEDDESTDIR \
--mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$CONTAINER_RESHAPEDESTDIR \
--mount type=bind,source=$HOST_SEQDESTDIR,target=$CONTAINER_SEQDESTDIR \
--mount type=bind,source=$HOST_SHAPEDESTDIR,target=$CONTAINER_SHAPEDESTDIR \
--mount type=bind,source=$HOST_VARDESTDIR,target=$CONTAINER_VARDESTDIR \
--mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$CONTAINER_VARIMPLICITSEQDESTDIR \
--mount type=bind,source=$HOST_VARSEQDESTDIR,target=$CONTAINER_VARSEQDESTDIR \
$TENSORRT_IMAGE

docker run $DOCKER_GPU_ARGS \
--rm -v $DOCKER_VOLUME:/mnt \
$TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT

# Copy generated models to /tmp/ if not running in CI
if [ -z $CI ] ; then
echo "Copying generated models to /tmp/"
docker cp $DOCKER_VOLUME:$VOLUME_BUILD_DIR/$TRITON_VERSION /tmp/
echo "Removing Docker volume $DOCKER_VOLUME"
docker rm -f $DOCKER_VOLUME
docker volume rm $DOCKER_VOLUME
fi
Loading
Loading