diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt index 255da316041f..26051ea3cd6d 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt @@ -18,7 +18,10 @@ # # Dockerfile to run MXNet on Ubuntu 16.04 for CPU -FROM nvidia/cuda:9.0-cudnn7-devel +FROM nvidia/cuda:10.0-cudnn7-devel + +# Avoid interactive package installers. +ENV DEBIAN_FRONTEND noninteractive WORKDIR /work/deps diff --git a/ci/docker/install/tensorrt.sh b/ci/docker/install/tensorrt.sh index 61e73ef9a62f..641a88e5dc91 100755 --- a/ci/docker/install/tensorrt.sh +++ b/ci/docker/install/tensorrt.sh @@ -41,7 +41,7 @@ popd # Install TensorRT echo "TensorRT build enabled. Installing TensorRT." -wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0_1-1_amd64.deb +wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0_1-1_amd64.deb dpkg -i tensorrt.deb apt-get update apt-get install -y --allow-downgrades libnvinfer-dev diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 39631f9dc7e6..2ddbc84a8841 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -523,7 +523,7 @@ build_ubuntu_gpu_tensorrt() { rm -rf build make \ - DEV=1 \ + DEV=0 \ ENABLE_TESTCOVERAGE=1 \ USE_BLAS=openblas \ USE_CUDA=1 \ diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/contrib/nnvm_to_onnx.cc index 902466614c7c..8fba9ab01dca 100644 --- a/src/operator/contrib/nnvm_to_onnx.cc +++ b/src/operator/contrib/nnvm_to_onnx.cc @@ -64,11 +64,20 @@ op::TRTParam ConvertNnvmGraphToOnnx( op::tensorrt::NameToIdx_t trt_input_map; op::tensorrt::InferenceMap_t trt_output_map; + static std::atomic_ulong subgraph_count = { 0 }; + op::TRTParam trt_param; + op::tensorrt::NameToIdx_t trt_input_map; + op::tensorrt::InferenceMap_t trt_output_map; const nnvm::IndexedGraph& ig = g.indexed_graph(); + const auto& storage_types = g.GetAttr("storage_type"); const auto& dtypes = g.GetAttr("dtype"); const auto& shape_inputs = g.GetAttr("shape_inputs"); + // TODO(kellens): At the moment this check always passes no matter the weight dtypes used in your + // graph. We should first iterate over datatypes by name and ensure they're valid types + // (fp16 or fp32) and that they're uniform. Then ensure later conversions set tensor types + // correctly in ONNX. for (auto& e : storage_types) { if (e != mshadow::kFloat32) { LOG(FATAL) << "ONNX converter does not support types other than float32 " @@ -78,8 +87,13 @@ op::TRTParam ConvertNnvmGraphToOnnx( ModelProto model_proto; // Need to determine IR versions and features to support - model_proto.set_ir_version(static_cast(2)); + auto opset_proto = model_proto.add_opset_import(); + opset_proto->set_version(static_cast(8)); + model_proto.set_ir_version(static_cast(3)); + GraphProto* graph_proto = model_proto.mutable_graph(); + auto subgraph_name_id = subgraph_count.fetch_add(1); + graph_proto->set_name("MXNetTRTSubgraph" + std::to_string(subgraph_name_id)); std::unordered_map placeholder_shapes = GetPlaceholderShapes(shape_inputs, ig); @@ -174,6 +188,20 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, // const bool no_bias = conv_param.no_bias; const dmlc::optional layout = conv_param.layout; + // dilations + AttributeProto* const dilations = node_proto->add_attribute(); + dilations->set_name("dilations"); + dilations->set_type(AttributeProto::INTS); + for (const dim_t kval : dilate) { + dilations->add_ints(static_cast(kval)); + } + + // group + AttributeProto* const group = node_proto->add_attribute(); + group->set_name("group"); + group->set_type(AttributeProto::INT); + group->set_i(static_cast(num_group)); + // kernel shape AttributeProto* const kernel_shape = node_proto->add_attribute(); kernel_shape->set_name("kernel_shape"); @@ -193,14 +221,6 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, pads->add_ints(static_cast(kval)); } - // dilations - AttributeProto* const dilations = node_proto->add_attribute(); - dilations->set_name("dilations"); - dilations->set_type(AttributeProto::INTS); - for (const dim_t kval : dilate) { - dilations->add_ints(static_cast(kval)); - } - // strides AttributeProto* const strides = node_proto->add_attribute(); strides->set_name("strides"); @@ -208,12 +228,6 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, for (const dim_t kval : stride) { strides->add_ints(static_cast(kval)); } - - // group - AttributeProto* const group = node_proto->add_attribute(); - group->set_name("group"); - group->set_type(AttributeProto::INT); - group->set_i(static_cast(num_group)); } // end ConvertConvolution void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs, @@ -248,8 +262,12 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs, AttributeProto* const pads = node_proto->add_attribute(); pads->set_name("pads"); pads->set_type(AttributeProto::INTS); - for (int kval : pad) { - pads->add_ints(static_cast(kval)); + + // Convert from MXNet symetric pads to ONNX non-symetric by running through padding twice. + for (int i =0; i < 2; i++) { + for (dim_t kval : pad) { + pads->add_ints(static_cast(kval)); + } } // strides @@ -313,11 +331,6 @@ void ConvertFullyConnected(NodeProto* node_proto, const NodeAttrs& attrs, beta->set_type(AttributeProto::FLOAT); beta->set_f(1.0f); - AttributeProto* const broadcast = node_proto->add_attribute(); - broadcast->set_name("broadcast"); - broadcast->set_type(AttributeProto::INT); - broadcast->set_i(1); - AttributeProto* const transA = node_proto->add_attribute(); transA->set_name("transA"); transA->set_type(AttributeProto::INT); @@ -369,11 +382,6 @@ void ConvertBatchNorm(NodeProto* node_proto, const NodeAttrs& attrs, epsilon->set_type(AttributeProto::FLOAT); epsilon->set_f(static_cast(param.eps)); - AttributeProto* const is_test = node_proto->add_attribute(); - is_test->set_name("is_test"); - is_test->set_type(AttributeProto::INT); - is_test->set_i(1); - AttributeProto* const momentum = node_proto->add_attribute(); momentum->set_name("momentum"); momentum->set_type(AttributeProto::FLOAT); @@ -382,31 +390,16 @@ void ConvertBatchNorm(NodeProto* node_proto, const NodeAttrs& attrs, AttributeProto* const spatial = node_proto->add_attribute(); spatial->set_name("spatial"); spatial->set_type(AttributeProto::INT); - spatial->set_i(1); - - AttributeProto* const consumed = node_proto->add_attribute(); - consumed->set_name("consumed_inputs"); - consumed->set_type(AttributeProto::INTS); - - for (int i = 0; i < 5; i++) { - int val = (i < 3) ? 0 : 1; - consumed->add_ints(static_cast(val)); - } + // MXNet computes mean and variance per feature for batchnorm. Enabling spatial mode + // (default in ONNX3) implies running batchnorm on all spatial features so we need to explicitly + // disable this for MXNet's BatchNorm. + spatial->set_i(0); } void ConvertElementwiseAdd(NodeProto* node_proto, const NodeAttrs& /*attrs*/, const nnvm::IndexedGraph& /*ig*/, const array_view& /*inputs*/) { node_proto->set_op_type("Add"); - AttributeProto* const axis = node_proto->add_attribute(); - axis->set_name("axis"); - axis->set_type(AttributeProto::INT); - axis->set_i(1); - - AttributeProto* const broadcast = node_proto->add_attribute(); - broadcast->set_name("broadcast"); - broadcast->set_type(AttributeProto::INT); - broadcast->set_i(0); // 1 } std::unordered_map GetPlaceholderShapes( @@ -459,32 +452,40 @@ void ConvertPlaceholder( void ConvertConstant( GraphProto* const graph_proto, const std::string& node_name, std::unordered_map* const shared_buffer) { - NodeProto* const node_proto = graph_proto->add_node(); - node_proto->set_name(node_name); - node_proto->add_output(node_name); - node_proto->set_op_type("Constant"); + TensorProto* const initializer_proto = graph_proto->add_initializer(); + + // Create initializer for constants + initializer_proto->set_name(node_name); + // TODO(kellens): convert to fp16 if needed. + initializer_proto->set_data_type(TensorProto_DataType_FLOAT); const NDArray nd = shared_buffer->find(node_name)->second; const TBlob& blob = nd.data(); const TShape shape = blob.shape_; - const int32_t size = shape.Size(); + for (auto& dim : shape) { + initializer_proto->add_dims(static_cast(dim)); + } + + auto size = shape.Size(); + // TODO(kellens): Note hard coded float32 size assumed. std::shared_ptr shared_data_ptr(new float[size]); float* const data_ptr = shared_data_ptr.get(); nd.SyncCopyToCPU(static_cast(data_ptr), size); - AttributeProto* const tensor_attr = node_proto->add_attribute(); - tensor_attr->set_name("value"); - tensor_attr->set_type(AttributeProto::TENSOR); - - TensorProto* const tensor_proto = tensor_attr->mutable_t(); - tensor_proto->set_data_type(TensorProto_DataType_FLOAT); - for (auto& dim : shape) { - tensor_proto->add_dims(static_cast(dim)); + for (int blob_idx = 0; blob_idx < size; ++blob_idx) { + initializer_proto->add_float_data(data_ptr[blob_idx]); } - for (int blob_idx = 0; blob_idx < size; ++blob_idx) { - tensor_proto->add_float_data(data_ptr[blob_idx]); + // Create inputs for constants. + ValueInfoProto* const input_proto = graph_proto->add_input(); + input_proto->set_name(node_name); + + // TODO(kellens): (fp16 support) + input_proto->mutable_type()->mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + for (auto& dim : shape) { + auto new_dim = input_proto->mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim(); + new_dim->set_dim_value(static_cast(dim)); } }