From 83797400128d41910d87e957131f29fd466f4777 Mon Sep 17 00:00:00 2001 From: Sam Skalicky Date: Tue, 1 Sep 2020 13:26:09 -0700 Subject: [PATCH] MXNet Extensions enhancements2 (#19016) * initial commit * split lib_api.h into lib_api.cc, updated examples for 2.0/gluon * fixed licenses * whitespace * whitespace * modernize * fix modernize * fix modernize * fix modernize * fixed move * added lib_api.cc to CMakeLists.txt for example libs * working example * remove GLOBAL to fix protobuf issue * fixed library unload * added test target * fixed sanity * changed destructor to default * added /LD option for customop_gpu_lib target * moved /LD inside the <> * diff compile flags for relu_lib.cu and lib_api.cc * set CMAKE_VERBOSE_MAKEFILE for debug * added -v to ninja * added /MT * another try * changed /MT to -MT * set flags for cxx separately * split /LD /MT flags * refactored cuda APIs into header file * removed debugging stuff * updated instructions for gpu build * moved building into cmakelists * moved build stuff into separate CMakeLists.txt * fixed gpu example * fixed license * added dlmc library dependency * added nnvm dependency * removed nnvm dmlc dependencies, added WINDOWS_EXPORT_ALL_SYMBOLS option * fixed WINDOWS_EXPORT_ALL_SYMBOLS * changed nnvm to shared library * backed out external ops changes * split relu example into separate files to test separate lib_api.h/cc * sanity * addressed initial review items Co-authored-by: Ubuntu --- CMakeLists.txt | 19 +- config/linux_gpu.cmake | 16 +- example/extensions/lib_api/Makefile | 2 +- example/extensions/lib_api/init_lib.cc | 2 +- example/extensions/lib_custom_op/Makefile | 11 +- example/extensions/lib_custom_op/gemm_lib.cc | 2 +- example/extensions/lib_custom_op/relu_lib.cc | 171 ++ example/extensions/lib_custom_op/relu_lib.cu | 173 +- example/extensions/lib_custom_op/relu_lib.h | 90 + .../lib_custom_op/test_transposecsr.py | 24 +- .../lib_custom_op/test_transposerowsp.py | 21 +- .../lib_custom_op/transposecsr_lib.cc | 2 +- .../lib_custom_op/transposerowsp_lib.cc | 2 +- example/extensions/lib_pass/Makefile | 2 +- example/extensions/lib_pass/pass_lib.cc | 2 +- example/extensions/lib_subgraph/Makefile | 2 +- .../extensions/lib_subgraph/subgraph_lib.cc | 2 +- include/mxnet/c_api.h | 2 +- include/mxnet/lib_api.h | 1517 ++-------------- python/mxnet/library.py | 19 +- src/c_api/c_api.cc | 20 +- src/initialize.cc | 12 +- src/lib_api.cc | 1593 +++++++++++++++++ 23 files changed, 2150 insertions(+), 1556 deletions(-) create mode 100644 example/extensions/lib_custom_op/relu_lib.cc create mode 100644 example/extensions/lib_custom_op/relu_lib.h create mode 100644 src/lib_api.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index a48a63032098..fc543d4c7734 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -722,19 +722,20 @@ endif() target_compile_definitions(mxnet PUBLIC DMLC_LOG_FATAL_THROW=$) # extension libraries (custom operators, custom subgraphs) are built by default -add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc) -add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc) -add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc) -add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc) -add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc) +add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) +add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) +add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) +add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) +add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) + target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) if(USE_CUDA) - add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu) - target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet) + add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc) + target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op) endif() if(UNIX) if (USE_CUDA) @@ -752,7 +753,9 @@ elseif(MSVC) set_target_properties(subgraph_lib PROPERTIES PREFIX "lib") set_target_properties(pass_lib PROPERTIES PREFIX "lib") if(USE_CUDA) - target_compile_options(customop_gpu_lib PUBLIC "$<$:-Xcompiler=-fPIC>") + target_compile_options(customop_gpu_lib PRIVATE "$<$:-Xcompiler=-LD -MT>") + target_compile_options(customop_gpu_lib PRIVATE "$<$:/LD>") + target_compile_options(customop_gpu_lib PRIVATE "$<$:/MT>") set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib") endif() endif() diff --git a/config/linux_gpu.cmake b/config/linux_gpu.cmake index 442ac6cb3578..c75d2947d421 100644 --- a/config/linux_gpu.cmake +++ b/config/linux_gpu.cmake @@ -24,7 +24,8 @@ # # $ cp config/linux_gpu.cmake config.cmake # -# Next modify the according entries, and then compile by +# Next modify the entries in the config.cmake like MXNET_CUDA_ARCH to set the specific +# GPU architecture, and then compile by # # $ mkdir build; cd build # $ cmake .. @@ -42,15 +43,18 @@ set(USE_CUDA ON CACHE BOOL "Build with CUDA support") set(USE_CUDNN ON CACHE BOOL "Build with cudnn support, if found") # Target NVIDIA GPU achitecture. -# Valid options are "Auto" for autodetection, "All" for all available -# architectures or a list of architectures by compute capability number, such as -# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing". +# Valid options are: +# - "Auto" for autodetection, will try and discover which GPU architecture to use by +# looking at the available GPUs on the machine that you're building on +# - "All" for all available GPU architectures supported by the version of CUDA installed +# - "specific GPU architectures" by giving the compute capability number such as +# "7.0" or "7.0;7.5" (ie. sm_70 or sm_75) or you can specify the name like: +# "Volta" or "Volta;Turing", be sure not to use quotes (ie. just set to 7.0) # The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to # obtain the compilation flags for nvcc. # # When compiling on a machine without GPU, autodetection will fail and you -# should instead specify the target architecture manually to avoid excessive -# compilation times. +# should instead specify the target architecture manually. set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture") #--------------------------------------------- diff --git a/example/extensions/lib_api/Makefile b/example/extensions/lib_api/Makefile index cb529390b77f..e71e4d8ac114 100644 --- a/example/extensions/lib_api/Makefile +++ b/example/extensions/lib_api/Makefile @@ -16,7 +16,7 @@ # under the License. all: - g++ -std=c++11 -shared -fPIC init_lib.cc -o libinit_lib.so -I ../../../include/mxnet + g++ -std=c++11 -shared -fPIC init_lib.cc ../../../src/lib_api.cc -o libinit_lib.so -I ../../../include test: g++ -std=c++11 -O3 -o libtest libtest.cc -ldl -I ../../../include/mxnet diff --git a/example/extensions/lib_api/init_lib.cc b/example/extensions/lib_api/init_lib.cc index 0ed43761fe53..a21c481bee2f 100644 --- a/example/extensions/lib_api/init_lib.cc +++ b/example/extensions/lib_api/init_lib.cc @@ -24,7 +24,7 @@ */ #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/example/extensions/lib_custom_op/Makefile b/example/extensions/lib_custom_op/Makefile index feded2947ca3..97dabf8a0759 100644 --- a/example/extensions/lib_custom_op/Makefile +++ b/example/extensions/lib_custom_op/Makefile @@ -18,16 +18,19 @@ all: gemm_lib relu_lib transposecsr_lib transposerowsp_lib gemm_lib: - g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet + g++ -shared -fPIC -std=c++11 gemm_lib.cc ../../../src/lib_api.cc -o libgemm_lib.so -I ../../../include relu_lib: - nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet + g++ -fPIC -c -std=c++11 relu_lib.cc -o relu_lib.cc.o -I ../../../include + g++ -fPIC -c -std=c++11 ../../../src/lib_api.cc -o lib_api.cc.o -I ../../../include + nvcc -c -std=c++11 -Xcompiler -fPIC relu_lib.cu -o relu_lib.cu.o -I ../../../include + nvcc -shared relu_lib.cc.o lib_api.cc.o relu_lib.cu.o -o librelu_lib.so transposecsr_lib: - g++ -shared -fPIC -std=c++11 transposecsr_lib.cc -o libtransposecsr_lib.so -I ../../../include/mxnet + g++ -shared -fPIC -std=c++11 transposecsr_lib.cc ../../../src/lib_api.cc -o libtransposecsr_lib.so -I ../../../include transposerowsp_lib: - g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc -o libtransposerowsp_lib.so -I ../../../include/mxnet + g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc ../../../src/lib_api.cc -o libtransposerowsp_lib.so -I ../../../include clean: rm -rf libgemm_lib.so librelu_lib.so libtransposecsr_lib.so libtransposerowsp_lib.so diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc index 6081713cda67..164ac014f922 100644 --- a/example/extensions/lib_custom_op/gemm_lib.cc +++ b/example/extensions/lib_custom_op/gemm_lib.cc @@ -25,7 +25,7 @@ #include #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/example/extensions/lib_custom_op/relu_lib.cc b/example/extensions/lib_custom_op/relu_lib.cc new file mode 100644 index 000000000000..4d9533d85465 --- /dev/null +++ b/example/extensions/lib_custom_op/relu_lib.cc @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file relu_lib.cu + * \brief simple custom relu and noisy relu operator implemented using CUDA function + */ + +#include +#include "relu_lib.h" + +using namespace mxnet::ext; + +MXReturnValue parseAttrs(const std::unordered_map& attrs, + int* num_in, int* num_out) { + *num_in = 1; + *num_out = 1; + return MX_SUCCESS; +} + +MXReturnValue inferType(const std::unordered_map& attrs, + std::vector* intypes, + std::vector* outtypes) { + outtypes->at(0) = intypes->at(0); + return MX_SUCCESS; +} + +MXReturnValue inferShape(const std::unordered_map& attrs, + std::vector>* inshapes, + std::vector>* outshapes) { + outshapes->at(0) = inshapes->at(0); + return MX_SUCCESS; +} + +MXReturnValue forwardCPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res) { + float* in_data = inputs->at(0).data(); + float* out_data = outputs->at(0).data(); + for (int i=0; iat(0).size(); i++) { + out_data[i] = in_data[i] > 0 ? in_data[i] : 0; + } + return MX_SUCCESS; +} + +MXReturnValue backwardCPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res) { + float* out_grad = inputs->at(0).data(); + float* in_data = inputs->at(1).data(); + float* in_grad = outputs->at(0).data(); + for (int i=0; iat(1).size(); i++) { + in_grad[i] = in_data[i] > 0 ? 1 * out_grad[i] : 0; + } + return MX_SUCCESS; +} + +REGISTER_OP(my_relu) +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape) +.setForward(forwardCPU, "cpu") +.setForward(forwardGPU, "gpu") +.setBackward(backwardCPU, "cpu") +.setBackward(backwardGPU, "gpu"); + + +MyStatefulReluCPU::MyStatefulReluCPU(const std::unordered_map& attrs) + : attrs_(attrs) {} + +MXReturnValue MyStatefulReluCPU::Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) { + return forwardCPU(attrs_, inputs, outputs, op_res); +} + +MXReturnValue MyStatefulReluCPU::Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) { + return backwardCPU(attrs_, inputs, outputs, op_res); +} + +MyStatefulReluGPU::MyStatefulReluGPU(const std::unordered_map& attrs) + : attrs_(attrs) {} + +MXReturnValue MyStatefulReluGPU::Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) { + return forwardGPU(attrs_, inputs, outputs, op_res); +} + +MXReturnValue MyStatefulReluGPU::Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) { + return backwardGPU(attrs_, inputs, outputs, op_res); +} + + +MXReturnValue createOpStateCPU(const std::unordered_map& attrs, + CustomStatefulOp** op_inst) { + *op_inst = new MyStatefulReluCPU(attrs); + return MX_SUCCESS; +} + +MXReturnValue createOpStateGPU(const std::unordered_map& attrs, + CustomStatefulOp** op_inst) { + *op_inst = new MyStatefulReluGPU(attrs); + return MX_SUCCESS; +} + +REGISTER_OP(my_state_relu) +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape) +.setCreateOpState(createOpStateCPU, "cpu") +.setCreateOpState(createOpStateGPU, "gpu"); + +MXReturnValue noisyForwardCPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res) { + float* in_data = inputs->at(0).data(); + float* out_data = outputs->at(0).data(); + + mx_cpu_rand_t* states = res.get_cpu_rand_states(); + std::normal_distribution dist_normal; + + for (int i=0; iat(0).size(); ++i) { + float noise = dist_normal(*states); + out_data[i] = in_data[i] + noise > 0 ? in_data[i] + noise : 0; + } + return MX_SUCCESS; +} + +REGISTER_OP(my_noisy_relu) +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape) +.setForward(noisyForwardCPU, "cpu") +.setForward(noisyForwardGPU, "gpu") +.setBackward(backwardCPU, "cpu") +.setBackward(backwardGPU, "gpu"); + +MXReturnValue initialize(int version) { + if (version >= 20000) { + std::cout << "MXNet version " << version << " supported" << std::endl; + return MX_SUCCESS; + } else { + MX_ERROR_MSG << "MXNet version " << version << " not supported"; + return MX_FAIL; + } +} diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu index 7022c76e6999..c309274e61c6 100644 --- a/example/extensions/lib_custom_op/relu_lib.cu +++ b/example/extensions/lib_custom_op/relu_lib.cu @@ -24,49 +24,16 @@ */ #include -#include "lib_api.h" +#include "relu_lib.h" using namespace mxnet::ext; -#define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block - __global__ void relu_gpu_forward(float *out, float *in, int64_t N) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) out[tid] = in[tid] > 0 ? in[tid] : 0; } -__global__ void relu_gpu_backward(float *ingrad, float *outgrad, float *indata, int64_t N) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < N) - ingrad[tid] = indata[tid] > 0 ? 1 * outgrad[tid] : 0; -} - -MXReturnValue forwardCPU(const std::unordered_map& attrs, - std::vector* inputs, - std::vector* outputs, - const OpResource& res) { - float* in_data = inputs->at(0).data(); - float* out_data = outputs->at(0).data(); - for (int i=0; iat(0).size(); i++) { - out_data[i] = in_data[i] > 0 ? in_data[i] : 0; - } - return MX_SUCCESS; -} - -MXReturnValue backwardCPU(const std::unordered_map& attrs, - std::vector* inputs, - std::vector* outputs, - const OpResource& res) { - float* out_grad = inputs->at(0).data(); - float* in_data = inputs->at(1).data(); - float* in_grad = outputs->at(0).data(); - for (int i=0; iat(1).size(); i++) { - in_grad[i] = in_data[i] > 0 ? 1 * out_grad[i] : 0; - } - return MX_SUCCESS; -} - MXReturnValue forwardGPU(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, @@ -83,6 +50,12 @@ MXReturnValue forwardGPU(const std::unordered_map& att return MX_SUCCESS; } +__global__ void relu_gpu_backward(float *ingrad, float *outgrad, float *indata, int64_t N) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < N) + ingrad[tid] = indata[tid] > 0 ? 1 * outgrad[tid] : 0; +} + MXReturnValue backwardGPU(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, @@ -99,102 +72,6 @@ MXReturnValue backwardGPU(const std::unordered_map& at return MX_SUCCESS; } -MXReturnValue parseAttrs(const std::unordered_map& attrs, - int* num_in, int* num_out) { - *num_in = 1; - *num_out = 1; - return MX_SUCCESS; -} - -MXReturnValue inferType(const std::unordered_map& attrs, - std::vector* intypes, - std::vector* outtypes) { - outtypes->at(0) = intypes->at(0); - return MX_SUCCESS; -} - -MXReturnValue inferShape(const std::unordered_map& attrs, - std::vector>* inshapes, - std::vector>* outshapes) { - outshapes->at(0) = inshapes->at(0); - return MX_SUCCESS; -} - -REGISTER_OP(my_relu) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setForward(forwardCPU, "cpu") -.setForward(forwardGPU, "gpu") -.setBackward(backwardCPU, "cpu") -.setBackward(backwardGPU, "gpu"); - -class MyStatefulReluCPU : public CustomStatefulOp { - public: - explicit MyStatefulReluCPU(const std::unordered_map& attrs) - : attrs_(attrs) {} - MXReturnValue Forward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) { - return forwardCPU(attrs_, inputs, outputs, op_res); - } - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) { - return backwardCPU(attrs_, inputs, outputs, op_res); - } - ~MyStatefulReluCPU() {} - private: - const std::unordered_map attrs_; -}; - -class MyStatefulReluGPU : public CustomStatefulOp { - public: - explicit MyStatefulReluGPU(const std::unordered_map& attrs) - : attrs_(attrs) {} - MXReturnValue Forward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) { - return forwardGPU(attrs_, inputs, outputs, op_res); - } - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) { - return backwardGPU(attrs_, inputs, outputs, op_res); - } - ~MyStatefulReluGPU() {} - private: - const std::unordered_map attrs_; -}; - -MXReturnValue createOpStateCPU(const std::unordered_map& attrs, - CustomStatefulOp** op_inst) { - *op_inst = new MyStatefulReluCPU(attrs); - return MX_SUCCESS; -} - -MXReturnValue createOpStateGPU(const std::unordered_map& attrs, - CustomStatefulOp** op_inst) { - *op_inst = new MyStatefulReluGPU(attrs); - return MX_SUCCESS; -} - -REGISTER_OP(my_state_relu) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setCreateOpState(createOpStateCPU, "cpu") -.setCreateOpState(createOpStateGPU, "gpu"); - -/* - * Below is noisy ReLU operator example - * noisy ReLU is made from ReLU extended to include Gaussian noise - * forward - add Gaussian noise generated from normal distribution to each unit - * backward - gradient doesn't need to change since noise is constant - */ - -#define NumRandomPerThread 64 // mxnet recommended random numbers generated per thread - __global__ void noisy_relu_gpu_forward(float *out, float *in, int64_t N, mx_gpu_rand_t* states, int step) { // the launcher logic ensures tid less than NumGPURandomStates int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -209,23 +86,6 @@ __global__ void noisy_relu_gpu_forward(float *out, float *in, int64_t N, mx_gpu_ } } -MXReturnValue noisyForwardCPU(const std::unordered_map& attrs, - std::vector* inputs, - std::vector* outputs, - const OpResource& res) { - float* in_data = inputs->at(0).data(); - float* out_data = outputs->at(0).data(); - - mx_cpu_rand_t* states = res.get_cpu_rand_states(); - std::normal_distribution dist_normal; - - for (int i=0; iat(0).size(); ++i) { - float noise = dist_normal(*states); - out_data[i] = in_data[i] + noise > 0 ? in_data[i] + noise : 0; - } - return MX_SUCCESS; -} - MXReturnValue noisyForwardGPU(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, @@ -250,22 +110,3 @@ MXReturnValue noisyForwardGPU(const std::unordered_map return MX_SUCCESS; } - -REGISTER_OP(my_noisy_relu) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setForward(noisyForwardCPU, "cpu") -.setForward(noisyForwardGPU, "gpu") -.setBackward(backwardCPU, "cpu") -.setBackward(backwardGPU, "gpu"); - -MXReturnValue initialize(int version) { - if (version >= 10700) { - std::cout << "MXNet version " << version << " supported" << std::endl; - return MX_SUCCESS; - } else { - MX_ERROR_MSG << "MXNet version " << version << " not supported"; - return MX_FAIL; - } -} diff --git a/example/extensions/lib_custom_op/relu_lib.h b/example/extensions/lib_custom_op/relu_lib.h new file mode 100644 index 000000000000..5aadfe930340 --- /dev/null +++ b/example/extensions/lib_custom_op/relu_lib.h @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file relu_lib.cu + * \brief simple custom relu and noisy relu operator implemented using CUDA function + */ + +#ifndef __EXAMPLE__RELU_LIB_H__ +#define __EXAMPLE__RELU_LIB_H__ + +#include +#include "mxnet/lib_api.h" + +using namespace mxnet::ext; + +#define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block +#define NumRandomPerThread 64 // mxnet recommended random numbers generated per thread + +class MyStatefulReluCPU : public CustomStatefulOp { + public: + explicit MyStatefulReluCPU(const std::unordered_map& attrs); + + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + MXReturnValue Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + + private: + const std::unordered_map attrs_; +}; + +class MyStatefulReluGPU : public CustomStatefulOp { + public: + explicit MyStatefulReluGPU(const std::unordered_map& attrs); + + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + + MXReturnValue Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + + private: + const std::unordered_map attrs_; +}; + +MXReturnValue forwardGPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res); + +MXReturnValue backwardGPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res); + +/* + * Below is noisy ReLU operator example + * noisy ReLU is made from ReLU extended to include Gaussian noise + * forward - add Gaussian noise generated from normal distribution to each unit + * backward - gradient doesn't need to change since noise is constant + */ + +MXReturnValue noisyForwardGPU(const std::unordered_map& attrs, + std::vector* inputs, + std::vector* outputs, + const OpResource& res); + +#endif diff --git a/example/extensions/lib_custom_op/test_transposecsr.py b/example/extensions/lib_custom_op/test_transposecsr.py index 37d066a7bec2..5f670aeedd9b 100644 --- a/example/extensions/lib_custom_op/test_transposecsr.py +++ b/example/extensions/lib_custom_op/test_transposecsr.py @@ -54,25 +54,25 @@ print("indices:", c.indices.asnumpy()) print("indptr:", c.indptr.asnumpy()) -print("--------start symbolic compute--------") +print("--------start Gluon compute--------") d = mx.sym.Variable('d') e = mx.sym.my_transposecsr(d) f = mx.sym.my_state_transposecsr(d, test_kw=200) -exe = e.bind(ctx=mx.cpu(),args={'d':a}) -exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) -out = exe.forward() +block = mx.gluon.nn.SymbolBlock(e, [d]) +out = block(a) print("Compute Results:") -print("data:", out[0].data.asnumpy()) -print("indices:", out[0].indices.asnumpy()) -print("indptr:", out[0].indptr.asnumpy()) +print("data:", out.data.asnumpy()) +print("indices:", out.indices.asnumpy()) +print("indptr:", out.indptr.asnumpy()) -out2 = exe2.forward() -out2 = exe2.forward() +block2 = mx.gluon.nn.SymbolBlock(f,[d]) +out2 = block2(a) +out2 = block2(a) print("Stateful Compute Result:") -print("data:", out2[0].data.asnumpy()) -print("indices:", out2[0].indices.asnumpy()) -print("indptr:", out2[0].indptr.asnumpy()) +print("data:", out2.data.asnumpy()) +print("indices:", out2.indices.asnumpy()) +print("indptr:", out2.indptr.asnumpy()) print("--------Baseline(dense)--------") print(mx.nd.transpose(a.tostype('default'))) diff --git a/example/extensions/lib_custom_op/test_transposerowsp.py b/example/extensions/lib_custom_op/test_transposerowsp.py index cea62ec6e98c..ef51deaba24a 100644 --- a/example/extensions/lib_custom_op/test_transposerowsp.py +++ b/example/extensions/lib_custom_op/test_transposerowsp.py @@ -51,23 +51,24 @@ print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) -print("--------start symbolic compute--------") +print("--------start Gluon compute--------") d = mx.sym.Variable('d') e = mx.sym.my_transposerowsp(d) f = mx.sym.my_state_transposerowsp(d, test_kw=200) -exe = e.bind(ctx=mx.cpu(),args={'d':a}) -exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) -out = exe.forward() +block = mx.gluon.nn.SymbolBlock(e,[d]) +out = block(a) print("Compute Results:") -print("data:", out[0].data.asnumpy()) -print("indices:", out[0].indices.asnumpy()) +print(out) +print("data:", out.data.asnumpy()) +print("indices:", out.indices.asnumpy()) -out2 = exe2.forward() -out2 = exe2.forward() +block2 = mx.gluon.nn.SymbolBlock(f,[d]) +out2 = block2(a) +out2 = block2(a) print("Stateful Compute Result:") -print("data:", out2[0].data.asnumpy()) -print("indices:", out2[0].indices.asnumpy()) +print("data:", out2.data.asnumpy()) +print("indices:", out2.indices.asnumpy()) print("--------Baseline(dense)--------") print(mx.nd.transpose(a.tostype('default'))) diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index 0a882f4d2517..2ef85c4b46eb 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -25,7 +25,7 @@ #include #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc index cb4592239ef9..72b9b312566a 100644 --- a/example/extensions/lib_custom_op/transposerowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -25,7 +25,7 @@ #include #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/example/extensions/lib_pass/Makefile b/example/extensions/lib_pass/Makefile index 759a08c48c89..e555b191ecf5 100644 --- a/example/extensions/lib_pass/Makefile +++ b/example/extensions/lib_pass/Makefile @@ -18,7 +18,7 @@ all: pass_lib pass_lib: - g++ -shared -fPIC -std=c++11 pass_lib.cc -o libpass_lib.so -I ../../../include/mxnet + g++ -shared -fPIC -std=c++11 pass_lib.cc ../../../src/lib_api.cc -o libpass_lib.so -I ../../../include clean: rm -rf libpass_lib.so diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc index 825d38290936..fb9a2d42f8d3 100644 --- a/example/extensions/lib_pass/pass_lib.cc +++ b/example/extensions/lib_pass/pass_lib.cc @@ -26,7 +26,7 @@ #include #include #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/example/extensions/lib_subgraph/Makefile b/example/extensions/lib_subgraph/Makefile index c45100b69ef7..5449e3af9c58 100644 --- a/example/extensions/lib_subgraph/Makefile +++ b/example/extensions/lib_subgraph/Makefile @@ -18,7 +18,7 @@ all: subgraph_lib subgraph_lib: - g++ -shared -fPIC -std=c++11 subgraph_lib.cc -o libsubgraph_lib.so -I ../../../include/mxnet + g++ -shared -fPIC -std=c++11 subgraph_lib.cc ../../../src/lib_api.cc -o libsubgraph_lib.so -I ../../../include clean: rm -rf libsubgraph_lib.so diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc index b2b5a74f2d0a..1f39345cc460 100644 --- a/example/extensions/lib_subgraph/subgraph_lib.cc +++ b/example/extensions/lib_subgraph/subgraph_lib.cc @@ -27,7 +27,7 @@ #include #include #include -#include "lib_api.h" +#include "mxnet/lib_api.h" using namespace mxnet::ext; diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 1f900dd6f0d3..0087e74227d7 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -242,7 +242,7 @@ MXNET_DLL const char *MXGetLastError(); * \param 0 for quiet, 1 for verbose * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXLoadLib(const char *path, unsigned verbose); +MXNET_DLL int MXLoadLib(const char *path, unsigned verbose, void** lib); /*! * \brief Get list of features supported on the runtime diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 3367bc661c12..57267d420a33 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -48,11 +48,12 @@ #include #if defined(__NVCC__) + #include #include #endif /* Make sure to update the version number everytime you make changes */ -#define MX_LIBRARY_VERSION 8 +#define MX_LIBRARY_VERSION 9 /*! * \brief For loading multiple custom op libraries in Linux, exporting same symbol multiple @@ -216,6 +217,33 @@ extern "C" { namespace mxnet { namespace ext { +/* \brief Class to store error messages from extensions to pass to MXNet */ +class MXerrorMsgs { + public: + /* \brief get singleton pointer to class */ + static MXerrorMsgs* get(); + + /* \brief add a new error message */ + std::stringstream& add(const char* file, int line); + + /* \brief return number of error messages */ + int size(); + + /* \brief get error message at index */ + const std::string* get(int idx); + + private: + /*! \brief constructor */ + MXerrorMsgs() {} + /*! \brief destructor */ + ~MXerrorMsgs() {} + /*! \brief map of entries in registry */ + std::vector messages; +}; + +// Add a new error message, example: MX_ERROR_MSG << "my error msg"; +#define MX_ERROR_MSG MXerrorMsgs::get()->add(__FILE__, __LINE__) + /*! * \brief Tensor data type, consistent with mshadow data type */ @@ -248,15 +276,13 @@ enum MXStorageType { * dev_id is the device index where the tensor locates */ struct MXContext { - MXContext() : dev_type("error"), dev_id(-1) {} - explicit MXContext(std::string dev_type_, int dev_id_) - : dev_type(dev_type_), dev_id(dev_id_) {} - explicit MXContext(const char* dev_type_, int dev_id_) - : dev_type(dev_type_), dev_id(dev_id_) {} - static MXContext CPU() { return MXContext("cpu", 0); } - static MXContext GPU() { return MXContext("gpu", 0); } - static MXContext CPU(int dev_id) { return MXContext("cpu", dev_id); } - static MXContext GPU(int dev_id) { return MXContext("gpu", dev_id); } + MXContext(); + explicit MXContext(std::string dev_type_, int dev_id_); + explicit MXContext(const char* dev_type_, int dev_id_); + static MXContext CPU(); + static MXContext GPU(); + static MXContext CPU(int dev_id); + static MXContext GPU(int dev_id); std::string dev_type; int dev_id; @@ -286,114 +312,24 @@ struct MXSparse { int64_t indptr_len; void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, - int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { - data = data_ptr; - // If CSR, num of non-zero elemets is num_idx, - // If row sparse, num of elements is num_idx * width. - data_len = num_idx; - if (!idx_ptr) { - for (int i = 1; i < ndims; ++i) - data_len *= dims[i]; - } - - indices = reinterpret_cast(idx); - indices_len = num_idx; - - if (idx_ptr) { - indptr = reinterpret_cast(idx_ptr); - indptr_len = num_idx_ptr; - } - } + int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0); }; /*! * \brief Tensor data structure used by custom operator */ struct MXTensor { - MXTensor() : data_ptr(nullptr), dtype(kUNSET), verID(0), stype(kDefaultStorage) {} - MXTensor(const MXTensor& oth) : data_ptr(oth.data_ptr), shape(oth.shape), - dtype(oth.dtype), verID(oth.verID), ctx(oth.ctx), stype(oth.stype) { - setDLTensor(); - } - MXTensor(void *data_ptr, const std::vector &shape, MXDType dtype, - size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage) - : data_ptr(data_ptr), shape(shape), dtype(dtype), verID(vID), ctx(mx_ctx), stype(stype) { - setDLTensor(); - } + MXTensor(); + MXTensor(const MXTensor& oth); + MXTensor(void *data_ptr, std::vector shape, MXDType dtype, + size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage); /*! \brief populate internal tensor fields */ void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, - size_t vID, MXContext mx_ctx, MXStorageType storage_type) { - data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = storage_type; - shape.clear(); - for (int j = 0; j < ndims; j++) { - shape.push_back(dims[j]); - } - setDLTensor(); - } + size_t vID, MXContext mx_ctx, MXStorageType storage_type); /*! \brief populate DLTensor fields */ - void setDLTensor() { - dltensor.data = data_ptr; - dltensor.ndim = shape.size(); - dltensor.shape = const_cast(shape.data()); - dltensor.strides = nullptr; - dltensor.byte_offset = 0; - dltensor.dtype.lanes = 1; - dltensor.ctx.device_id = ctx.dev_id; - if (ctx.dev_type == "cpu") - dltensor.ctx.device_type = kDLCPU; - else if (ctx.dev_type == "gpu") - dltensor.ctx.device_type = kDLGPU; - else if (ctx.dev_type == "opencl") - dltensor.ctx.device_type = kDLOpenCL; - else if (ctx.dev_type == "vulcan") - dltensor.ctx.device_type = kDLVulkan; - else if (ctx.dev_type == "metal") - dltensor.ctx.device_type = kDLMetal; - else if (ctx.dev_type == "vpi") - dltensor.ctx.device_type = kDLVPI; - else if (ctx.dev_type == "rocm") - dltensor.ctx.device_type = kDLROCM; - else - dltensor.ctx.device_type = kDLExtDev; - switch (dtype) { - case kFloat32: - dltensor.dtype.code = kDLFloat; - dltensor.dtype.bits = 32; - break; - case kFloat64: - dltensor.dtype.code = kDLFloat; - dltensor.dtype.bits = 64; - break; - case kFloat16: - dltensor.dtype.code = kDLFloat; - dltensor.dtype.bits = 16; - break; - case kUint8: - dltensor.dtype.code = kDLUInt; - dltensor.dtype.bits = 8; - break; - case kInt32: - dltensor.dtype.code = kDLInt; - dltensor.dtype.bits = 32; - break; - case kInt8: - dltensor.dtype.code = kDLInt; - dltensor.dtype.bits = 8; - break; - case kInt64: - dltensor.dtype.code = kDLInt; - dltensor.dtype.bits = 64; - break; - default: - dltensor.dtype.code = 0; - dltensor.dtype.bits = 0; - throw std::runtime_error("Error! Invalid dtype flag: " - + std::to_string(static_cast(dtype)) - + " when constructing MXTensor"); - } - } + void setDLTensor(); /*! \brief helper function to cast data pointer */ template @@ -402,24 +338,10 @@ struct MXTensor { } /*! \brief helper function to get data size */ - inline int64_t size() const { - int64_t size = 1; - for (unsigned int i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; - } + int64_t size() const; /*! \brief helper function to compare two MXTensors */ - inline bool isSame(const MXTensor &oth) const { - return data_ptr == oth.data_ptr && - dtype == oth.dtype && - verID == oth.verID && - ctx.dev_type == oth.ctx.dev_type && - ctx.dev_id == oth.ctx.dev_id && - shape == oth.shape && - stype == oth.stype; - } + bool isSame(const MXTensor &oth) const; // For dense, data_ptr points to 1D flattened tensor data // For sparse, data_ptr points to MXSparse @@ -473,28 +395,15 @@ class PassResource { public: PassResource(std::unordered_map* new_args, std::unordered_map* new_aux, - nd_malloc_t nd_malloc, const void* nd_alloc) - : new_args_(new_args), new_aux_(new_aux), nd_malloc_(nd_malloc), nd_alloc_(nd_alloc) {} + nd_malloc_t nd_malloc, const void* nd_alloc); + // allocate new arg param, adds to args map, returns newly allocated tensor MXTensor* alloc_arg(const std::string& name, const std::vector& shapes, - const MXContext &ctx, MXDType dtype) const { - void* data; - nd_malloc_(nd_alloc_, shapes.data(), shapes.size(), ctx.dev_type.c_str(), ctx.dev_id, - dtype, name.c_str(), 1, &data); - MXTensor tensor(data, shapes, dtype, 0, ctx, kDefaultStorage); - (*new_args_)[name] = tensor; - return &(new_args_->at(name)); - } + const MXContext &ctx, MXDType dtype) const; + // allocate new aux param, adds to aux map, returns newly allocated tensor MXTensor* alloc_aux(const std::string& name, const std::vector& shapes, - const MXContext &ctx, MXDType dtype) const { - void* data; - nd_malloc_(nd_alloc_, shapes.data(), shapes.size(), ctx.dev_type.c_str(), ctx.dev_id, - dtype, name.c_str(), 0, &data); - MXTensor tensor(data, shapes, dtype, 0, ctx, kDefaultStorage); - (*new_aux_)[name] = tensor; - return &(new_aux_->at(name)); - } + const MXContext &ctx, MXDType dtype) const; private: std::unordered_map* new_args_; @@ -511,43 +420,30 @@ class OpResource { OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream, sparse_malloc_t sparse_malloc_fp, void* sparse_alloc_fp, - void* rng_cpu_states, void* rng_gpu_states) - : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), - cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream), - sparse_malloc(sparse_malloc_fp), sparse_alloc(sparse_alloc_fp), - rand_cpu_states(rng_cpu_states), rand_gpu_states(rng_gpu_states) {} + void* rng_cpu_states, void* rng_gpu_states); /*! \brief allocate cpu memory controlled by MXNet */ - void* alloc_cpu(int size) const { - return cpu_malloc(cpu_alloc, size); - } + void* alloc_cpu(int size) const; /*! \brief allocate gpu memory controlled by MXNet */ - void* alloc_gpu(int size) const { - return gpu_malloc(gpu_alloc, size); - } + void* alloc_gpu(int size) const; /*! \brief return the cuda stream object with correct type */ - mx_stream_t get_cuda_stream() const { + inline mx_stream_t get_cuda_stream() const { return static_cast(cuda_stream); } /*! \brief allocate sparse memory controlled by MXNet */ - void alloc_sparse(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) const { - sparse_malloc(sparse_alloc, index, indices_len, indptr_len, - &(sparse->data), &(sparse->indices), &(sparse->indptr)); - } + void alloc_sparse(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) const; /*! \brief get pointer to initialized and seeded random number states located on CPU */ /* Access each state by states[id], but this id should be <= MX_NUM_CPU_RANDOM_STATES */ - mx_cpu_rand_t* get_cpu_rand_states() const { - return static_cast(rand_cpu_states); - } + mx_cpu_rand_t* get_cpu_rand_states() const; /*! \brief get pointer to initialized and seeded random number states located on GPU */ /* Access each state by states[id], but this id should be <= MX_NUM_GPU_RANDOM_STATES */ /* Note that if you are using cpu build, it will return a nullptr */ - mx_gpu_rand_t* get_gpu_rand_states() const { + inline mx_gpu_rand_t* get_gpu_rand_states() const { return static_cast(rand_gpu_states); } @@ -582,15 +478,7 @@ class OpResource { * getShapeAt("[[1]]", 0) returns "[1]" * getShapeAt("[[1],[2,3]]", 1) returns "[2,3]" */ -std::string getShapeAt(const std::string& shape, unsigned index) { - int idx = 1; // start at 1 to skip the first square bracket [ - // find the beginning of the output shape for the particular output index - for (unsigned x=0; x < index; x++) - idx = shape.find("[", idx+1); - int stop = shape.find("]", idx); // find stop index for this output shape - // add this shape to the list - return shape.substr(idx, stop-idx+1); -} +std::string getShapeAt(const std::string& shape, unsigned index); /* \brief get dtype value from list of dtypes string * @@ -599,15 +487,7 @@ std::string getShapeAt(const std::string& shape, unsigned index) { * getDtypeAt("[1]", 0) returns "1" * getDtypeAt("[1,2]", 1) returns "2" */ -std::string getDtypeAt(const std::string& dtype, unsigned index) { - // find the beginning of the output dtype for the particular output index - int idx = 0; - for (unsigned x=0; x < index; x++) - idx = dtype.find(",", idx+1); - int stop = dtype.find(",", idx+1); // find stop index for this output dtype - if (stop == -1) stop = dtype.find("]", idx+1); - return dtype.substr(idx+1, stop-idx-1); -} +std::string getDtypeAt(const std::string& dtype, unsigned index); /*! * \brief Json utility to parse serialized subgraph symbol @@ -617,196 +497,41 @@ enum JsonType {ERR, STR, NUM, LIST, MAP}; /*! \brief definition of JSON objects */ struct JsonVal { - JsonVal() : type(ERR), num(-1), str("") {} // default constructor + JsonVal(); // default constructor // construct a JSON object by type - explicit JsonVal(JsonType t) : type(t), num(-1), str("") {} + explicit JsonVal(JsonType t); // construct a string JSON object - explicit JsonVal(std::string s) : type(STR), num(-1), str(s) {} + explicit JsonVal(std::string s); // construct a number JSON object - explicit JsonVal(int n) : type(NUM), num(n), str(std::to_string(n)) {} + explicit JsonVal(int n); // complex constructor - JsonVal(JsonType t, int n, std::string s) : type(t), num(n), str(s) {} - bool operator<(const JsonVal &o) const { - // for string JSON objects compare the string - if (type == STR) return type == o.type && str < o.str; - // for number JSON objects compare the number - if (type == NUM) return type == o.type && num < o.num; - // for list JSON objects, compare the size of list, and then each object in the list - if (type == LIST) { - if (list.size() != o.list.size()) return false; - for (unsigned int i=0; i< list.size(); i++) - if (list[i] < o.list[i]) - return false; // if we find an object that doesnt match return - return true; // all objects in lists matched - } - // for map JSON objects, compare the size of map, and then each key/value in the maps - if (type == MAP) { - if (map.size() != o.map.size()) return false; - for (auto &item : map) { - // if one map is missing a key in another return - if (o.map.find(item.first) == o.map.end()) return false; - if (item.second < o.map.at(item.first)) return false; - } - return true; - } - return type < o.type; - } + JsonVal(JsonType t, int n, std::string s); + bool operator<(const JsonVal &o) const; // convert JSON object back to JSON-compatible string - std::string dump() const { - std::string ret; - switch (type) { - case ERR: - ret = "json(Error)"; - break; - case STR: - ret = "\"" + str + "\""; - break; - case NUM: - ret = str; - break; - case LIST: - ret = "["; - for (unsigned i=0; i < list.size(); i++) { - auto &item = list[i]; - ret += item.dump(); - if (i < list.size()-1) - ret += ","; - } - ret += "]"; - break; - case MAP: - ret = "{"; - unsigned cnt = 0; - for (auto &item : map) { - ret += item.first.dump() + " : " + item.second.dump(); - if (cnt++ < map.size()-1) - ret += ","; - } - ret += "}"; - break; - } - return ret; - } + std::string dump() const; + // convert JSON-compatible string to JSON object - static JsonVal parse(const std::string& json) { - unsigned int idx = 0; - return JsonVal::parse(json, &idx); - } + static JsonVal parse(const std::string& json); + // parse a string JSON object - static JsonVal parse_string(const std::string& json, unsigned int* idx) { - JsonVal ret(STR); - while (*idx < json.size()) { - if (json[*idx] == '"') { - ++(*idx); - return ret; - } else { - ret.str += json[*idx]; - ++(*idx); - } - } - std::cout << "Error! Unable to parse string" << std::endl; - return JsonVal(); - } + static JsonVal parse_string(const std::string& json, unsigned int* idx); + // parse a number JSON object - static JsonVal parse_num(const std::string& json, unsigned int* idx) { - JsonVal ret(NUM); - while (*idx < json.size()) { - if (json[*idx] >= '0' && json[*idx] <= '9') { - ret.str += json[*idx]; - ++(*idx); - } else { - break; - } - } - ret.num = std::stoi(ret.str); - return ret; - } + static JsonVal parse_num(const std::string& json, unsigned int* idx); + // parse a list of JSON objects - static JsonVal parse_list(const std::string& json, unsigned int* idx) { - JsonVal ret(LIST); - while (*idx < json.size()) { - if (json[*idx] == ']') { - ++(*idx); - return ret; - } else { - JsonVal item = JsonVal::parse(json, idx); - if (item.type != ERR) - ret.list.push_back(item); - } - } - std::cout << "Error! Unable to parse list" << std::endl; - return JsonVal(); - } + static JsonVal parse_list(const std::string& json, unsigned int* idx); + // parse a map of JSON objects - static JsonVal parse_map(const std::string& json, unsigned int* idx) { - JsonVal ret(MAP), key; - while (*idx < json.size()) { - if (json[*idx] == '}') { - ++(*idx); - return ret; - } else { - JsonVal item = JsonVal::parse(json, idx); - if (key.type == ERR) { - key = item; - } else { - ret.map[key] = item; - key.type = ERR; - } - } - } - std::cout << "Error! Unable to parse map" << std::endl; - return JsonVal(); - } + static JsonVal parse_map(const std::string& json, unsigned int* idx); + // generic parse function - static JsonVal parse(const std::string& json, unsigned int *idx) { - JsonVal ret; - while (*idx < json.size()) { - if (json[*idx] == '"') { - ++(*idx); - ret = JsonVal::parse_string(json, idx); - } else if (json[*idx] >= '0' && json[*idx] <= '9') { - ret = JsonVal::parse_num(json, idx); - } else if (json[*idx] == '[') { - ++(*idx); - ret = JsonVal::parse_list(json, idx); - } else if (json[*idx] == '{') { - ++(*idx); - ret = JsonVal::parse_map(json, idx); - } else if (json[*idx] == ']' || json[*idx] == '}') {return ret;} - if (ret.type != ERR) return ret; - ++(*idx); - } - return ret; - } + static JsonVal parse(const std::string& json, unsigned int *idx); + // debug function to convert data structure to a debugstring - std::string toString() const { - std::string ret; - switch (type) { - case ERR: - ret = "json(Error)"; - break; - case STR: - ret = "json(STR:" + str + ")"; - break; - case NUM: - ret = "json(INT:" + str + ")"; - break; - case LIST: - ret = "json(LIST:["; - for (auto &item : list) - ret += item.toString() + ","; - ret += "])"; - break; - case MAP: - ret = "json(MAP:{"; - for (auto &item : map) - ret += item.first.toString() + " : " + item.second.toString() + ","; - ret += "})"; - break; - } - return ret; - } + std::string toString() const; + JsonType type; int num; std::string str; @@ -829,25 +554,19 @@ struct NodeEntry { // Representation of a node in the graph class Node { public: - Node() {tensor = nullptr;} + Node(); + // internally set passResource to enable tensor allocation for graph passes - void _setPassResource(PassResource* res_) {res = res_;} + void _setPassResource(PassResource* res_); + /* \brief allocate an arg tensor for this node */ void alloc_arg(const std::vector& shapes, - const MXContext &ctx, MXDType dtype) { - if (!res) - throw std::runtime_error( - "Node not initialized. Cannot use alloc_arg outside of graph passes."); - tensor = res->alloc_arg(name, shapes, ctx, dtype); - } + const MXContext &ctx, MXDType dtype); + /* \brief allocate an aux tensor for this node */ void alloc_aux(const std::vector& shapes, - const MXContext &ctx, MXDType dtype) { - if (!res) - throw std::runtime_error( - "Node not initialized. Cannot use alloc_aux outside of graph passes."); - tensor = res->alloc_aux(name, shapes, ctx, dtype); - } + const MXContext &ctx, MXDType dtype); + std::string op; // operator name (ie. Convolution) std::string name; // unique node name (ie. conv_0 or conv_1) MXTensor* tensor; // tensor data for input nodes @@ -863,298 +582,57 @@ class Node { // Representation of the graph class Graph { public: - Graph() : res(nullptr) {} + Graph(); + /* \brief deleted nodes when deleting the graph */ - ~Graph() { - for (int i = 0; i < nodes.size(); i++) - delete nodes[i]; - } + ~Graph(); /* \brief create a graph object from an unparsed string */ - static Graph* fromString(const std::string& json) { - JsonVal val = JsonVal::parse(json); - return fromJson(val); - } + static Graph* fromString(const std::string& json); /* \brief create a graph object from a parsed JSON object */ - static Graph* fromJson(JsonVal val) { - // get nodes list - JsonVal nodes = val.map[JsonVal("nodes")]; - Graph *g = new Graph(); - - std::map nodeMap; - // loop over nodes - for (int i = 0; i < nodes.list.size(); i++) { - Node* n = new Node(); - g->nodes.push_back(n); - JsonVal node = nodes.list[i]; - - // set the op info - n->op = node.map[JsonVal("op")].str; - n->name = node.map[JsonVal("name")].str; - - // if op is null it is an input to the graph - if (n->op.compare("null") == 0) - g->inputs.push_back(n); - - // set attrs - JsonVal attributes = node.map[JsonVal("attrs")]; - for (auto& kv : attributes.map) { - n->attrs[kv.first.str] = kv.second.str; - } - - // set subgraphs, parsing each into a graph - if (node.map.count(JsonVal("subgraphs")) > 0) { - JsonVal subgraphs = node.map[JsonVal("subgraphs")]; - for (auto &subgraph : subgraphs.list) { - n->subgraphs.push_back(fromJson(subgraph)); - } - } - - // set node inputs - JsonVal node_inputs = node.map[JsonVal("inputs")]; - n->inputs.resize(node_inputs.list.size()); - for (int j = 0; j < node_inputs.list.size(); j++) { - JsonVal input = node_inputs.list[j]; - NodeEntry& entry = n->inputs[j]; - // get pointer to other node - entry.node = nodeMap[input.list[0].num]; - // get the other node's output index - entry.entry = input.list[1].num; - // set other nodes output as connected to this node - entry.node->outputs.push_back({n, j}); - } - nodeMap[i] = n; - } - - // set graph level outputs - JsonVal& heads = val.map[JsonVal("heads")]; - g->outputs.resize(heads.list.size()); - for (int i = 0; i < heads.list.size(); i++) { - JsonVal head = heads.list[i]; - g->outputs[i].node = nodeMap[head.list[0].num]; - g->outputs[i].entry = head.list[1].num; - } - - // add all attributes to the graph - for (auto& kv : val.map) { - if (kv.first.str.compare("nodes") != 0 && - kv.first.str.compare("heads") != 0 && - kv.first.str.compare("node_row_ptr") != 0 && - kv.first.str.compare("arg_nodes") != 0) { - g->attrs[kv.first.str] = kv.second; - } - } - return g; - } + static Graph* fromJson(JsonVal val); /* \brief convert graph object back to JSON object */ - JsonVal toJson() { - // top level object is a map - JsonVal val(MAP); - - // add attributes - for (auto& kv : attrs) { - val.map[JsonVal(kv.first)] = kv.second; - } - - // sort graph nodes in topological order, create mapping of node to index - std::map nodeMap; - std::vector sorted = topological_sort(); - // nodes are in reverse topological order in the vector (back is first) - // so loop from end to front over the vector 'sorted' - for (int i = sorted.size()-1; i >= 0; i--) { - nodeMap[sorted[i]] = sorted.size()-1-i; - } - - // create node_row_ptr entry - val.map[JsonVal("node_row_ptr")] = JsonVal(LIST); - JsonVal& node_row_ptr = val.map[JsonVal("node_row_ptr")]; - for (int i = 0; i < nodes.size(); i++) - node_row_ptr.list.push_back(JsonVal(i)); - - // add all input nodes - val.map[JsonVal("arg_nodes")] = JsonVal(LIST); - JsonVal& arg_nodes = val.map[JsonVal("arg_nodes")]; - for (int i = 0; i < inputs.size(); i++) - arg_nodes.list.push_back(JsonVal(nodeMap[inputs[i]])); - - // add all output nodes - val.map[JsonVal("heads")] = JsonVal(LIST); - JsonVal& heads = val.map[JsonVal("heads")]; - for (int i = 0; i < outputs.size(); i++) { - heads.list.push_back(JsonVal(LIST)); - JsonVal& out = heads.list[i]; - out.list.push_back(JsonVal(nodeMap[outputs[i].node])); - out.list.push_back(JsonVal(outputs[i].entry)); - out.list.push_back(JsonVal(0)); - } - - // add all graph nodes - val.map[JsonVal("nodes")] = JsonVal(LIST); - JsonVal& nodes_ = val.map[JsonVal("nodes")]; - for (int i = sorted.size()-1; i >= 0; i--) { - // each node is a map - nodes_.list.push_back(JsonVal(MAP)); - Node* n = sorted[i]; - JsonVal& n_ = nodes_.list[nodes_.list.size()-1]; - - n_.map[JsonVal("op")] = JsonVal(n->op); - n_.map[JsonVal("name")] = JsonVal(n->name); - n_.map[JsonVal("inputs")] = JsonVal(LIST); - - // add inputs for this node - JsonVal& inputs_ = n_.map[JsonVal("inputs")]; - for (int j = 0; j < n->inputs.size(); j++) { - inputs_.list.push_back(JsonVal(LIST)); - NodeEntry& entry = n->inputs[j]; - JsonVal& in = inputs_.list[j]; - in.list.push_back(JsonVal(nodeMap[entry.node])); - in.list.push_back(JsonVal(entry.entry)); - in.list.push_back(JsonVal(0)); - } - - // add subgraphs for this node, convert each back to JSON - if (n->subgraphs.size() > 0) { - n_.map[JsonVal("subgraphs")] = JsonVal(LIST); - JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")]; - for (Graph *subgraph : n->subgraphs) { - subgraphs_.list.push_back(subgraph->toJson()); - } - } - - // add attributes for this node - n_.map[JsonVal("attrs")] = JsonVal(MAP); - JsonVal& attrs_ = n_.map[JsonVal("attrs")]; - for (auto& kv : n->attrs) { - attrs_.map[JsonVal(kv.first)] = JsonVal(kv.second); - } - } - return val; - } + JsonVal toJson(); /* \brief convert graph object to JSON string */ - std::string toString() { - return toJson().dump(); - } + std::string toString(); /* \brief visits a node "n" */ void _dfs_util(Node* n, std::unordered_set* to_visit, - std::function handler) const { - to_visit->erase(n); // remove node now that we're visiting it - for (NodeEntry& e : n->outputs) { - Node* o = e.node; - if (to_visit->count(o) != 0) { - _dfs_util(o, to_visit, handler); // visit neighbor - } - } - handler(n); // post-order visit this node - } + std::function handler) const; /* \brief post-order DFS graph traversal */ - void DFS(std::function handler) const { - std::unordered_set to_visit; - // put all nodes in set to visit - for (auto& n : nodes) - to_visit.insert(n); - // visit all inputs first - for (auto& i : inputs) - if (to_visit.count(i) != 0) - _dfs_util(i, &to_visit, handler); - // visit any nodes left - while (to_visit.size() > 0) - _dfs_util(*(to_visit.begin()), &to_visit, handler); - } + void DFS(std::function handler) const; /* \brief sort graph nodes in topological order */ - std::vector topological_sort() const { - std::vector sorted; - auto handler = [&](Node* n) { - sorted.push_back(n); // when visiting each node, add it in order to the vector - }; - DFS(handler); - return sorted; - } + std::vector topological_sort() const; /* \brief print out graph details */ - void print(int indent = 0) const { - std::string space = ""; - for (int i = 0; i < indent; i++) space+=" "; - - std::cout << space << "########### Graph #############" << std::endl; - std::cout << space << "attributes: " << std::endl; - for (auto &kv : attrs) - std::cout << space << "\t" << kv.first << " : " << kv.second.str << std::endl; - std::cout << space << "inputs: " << inputs.size() << std::endl; - std::cout << space << "outputs: " << outputs.size() << std::endl; - std::cout << space << "nodes: " << nodes.size() << std::endl; - std::vector sorted = topological_sort(); - // loop over each node and print out its inputs/outputs - for (int i = sorted.size()-1; i >= 0; i--) { - std::cout << space << "Node: " << sorted[i]->name << std::endl; - for (int j = 0; j < sorted[i]->inputs.size(); j++) { - std::cout << space << "\tInput: " << sorted[i]->inputs[j].node->name << " " - << sorted[i]->inputs[j].entry << std::endl; - } - for (int j = 0; j < sorted[i]->outputs.size(); j++) { - std::cout << space << "\tOutput: " << sorted[i]->outputs[j].node->name << " " - << sorted[i]->outputs[j].entry << std::endl; - } - if (sorted[i]->subgraphs.size() > 0) { - for (auto &subgraph : sorted[i]->subgraphs) { - std::cout << space << "\tSubgraph:" << std::endl; - subgraph->print(indent+2); - } - } - } - std::cout << space << "###############################" << std::endl; - } + void print(int indent = 0) const; /* \brief add a new node to this graph */ - Node* addNode(const std::string& name, const std::string& op) { - Node* n = new Node(); - n->name = name; - n->op = op; - if (res) - n->_setPassResource(res); - return n; - } + Node* addNode(const std::string& name, const std::string& op); + /* \brief get node at index in graph */ - Node* getNode(size_t idx) { - return nodes[idx]; - } + Node* getNode(size_t idx); + /* \brief get const node at index in const graph */ - const Node* getNode(size_t idx) const { - return nodes.at(idx); - } + const Node* getNode(size_t idx) const; + /* \brief get attribute on graph */ - const JsonVal& getAttr(const std::string& key) const { - return attrs.at(key); - } + const JsonVal& getAttr(const std::string& key) const; + /* \brief get number of nodes in the graph */ - size_t size() const { - return nodes.size(); - } + size_t size() const; + // internally set passResource to enable tensor allocation for graph passes - void _setPassResource(PassResource* res_) {res = res_;} + void _setPassResource(PassResource* res_); + // internally set arg/aux params when available void _setParams(std::unordered_map* args, - std::unordered_map* aux) { - // set params for each input node - for (Node* node : inputs) { - if (args->count(node->name) > 0) - node->tensor = &args->at(node->name); - else if (aux->count(node->name) > 0) - node->tensor = &aux->at(node->name); - } - - if (res) { - // set passResource for each node - for (Node* node : nodes) { - node->_setPassResource(res); - } - } - } + std::unordered_map* aux); std::vector inputs; std::vector outputs; @@ -1214,7 +692,7 @@ class CustomStatefulOp { virtual MXReturnValue Backward(std::vector* inputs, std::vector* outputs, const OpResource& op_res) { - std::cout << "Error! Operator does not support backward" << std::endl; + MX_ERROR_MSG << "Error! Operator does not support backward" << std::endl; return MX_FAIL; } }; @@ -1230,30 +708,30 @@ class CustomStatefulOpWrapper { /*! \brief Custom Operator function templates */ typedef MXReturnValue (*fcomp_t)(const std::unordered_map& attributes, + std::string>& attributes, std::vector* inputs, std::vector* outputs, const OpResource& res); typedef MXReturnValue (*parseAttrs_t)(const std::unordered_map& attributes, + std::string>& attributes, int* num_inputs, int* num_outputs); typedef MXReturnValue (*inferType_t)(const std::unordered_map& attributes, + std::string>& attributes, std::vector* in_types, std::vector* out_types); typedef MXReturnValue (*inferSType_t)(const std::unordered_map& attributes, + std::string>& attributes, std::vector* in_storage_types, std::vector* out_storage_types); typedef MXReturnValue (*inferShape_t)(const std::unordered_map& attributes, + std::string>& attributes, std::vector >* in_shapes, std::vector >* out_shapes); typedef MXReturnValue (*mutateInputs_t)(const std::unordered_map& attributes, + std::string>& attributes, std::vector* input_indices); typedef MXReturnValue (*createOpState_t)(const std::unordered_map& attributes, + std::string>& attributes, CustomStatefulOp**); /*! @@ -1261,66 +739,27 @@ typedef MXReturnValue (*createOpState_t)(const std::unordered_map 0) - raiseDuplicateContextError(); - forward_ctx_map[ctx] = fcomp; - return *this; - } - CustomOp& setBackward(fcomp_t fgrad, const char* ctx) { - if (backward_ctx_map.count(ctx) > 0) - raiseDuplicateContextError(); - backward_ctx_map[ctx] = fgrad; - return *this; - } - CustomOp& setParseAttrs(parseAttrs_t func) { - parse_attrs = func; - return *this; - } - CustomOp& setInferType(inferType_t func) { - infer_type = func; - return *this; - } - CustomOp& setInferSType(inferSType_t func) { - infer_storage_type = func; - return *this; - } - CustomOp& setInferShape(inferShape_t func) { - infer_shape = func; - return *this; - } - CustomOp& setMutateInputs(mutateInputs_t func) { - mutate_inputs = func; - return *this; - } - CustomOp& setCreateOpState(createOpState_t func, const char* ctx) { - if (create_op_ctx_map.count(ctx) > 0) - raiseDuplicateContextError(); - create_op_ctx_map[ctx] = func; - return *this; - } - CustomOp& setIsSubgraphOp() { - isSGop = true; - return *this; - } - void mapToVector() { - for (auto kv : forward_ctx_map) { - forward_ctx_cstr.push_back(kv.first); - forward_fp.push_back(kv.second); - } - for (auto kv : backward_ctx_map) { - backward_ctx_cstr.push_back(kv.first); - backward_fp.push_back(kv.second); - } - for (auto kv : create_op_ctx_map) { - create_op_ctx_cstr.push_back(kv.first); - create_op_fp.push_back(kv.second); - } - } - ~CustomOp() {} + explicit CustomOp(const char* op_name); + + CustomOp& setForward(fcomp_t fcomp, const char* ctx); + + CustomOp& setBackward(fcomp_t fgrad, const char* ctx); + + CustomOp& setParseAttrs(parseAttrs_t func); + + CustomOp& setInferType(inferType_t func); + + CustomOp& setInferSType(inferSType_t func); + + CustomOp& setInferShape(inferShape_t func); + + CustomOp& setMutateInputs(mutateInputs_t func); + + CustomOp& setCreateOpState(createOpState_t func, const char* ctx); + + CustomOp& setIsSubgraphOp(); + + void mapToVector(); /*! \brief operator name */ const char* name; @@ -1339,12 +778,7 @@ class CustomOp { std::vector create_op_fp; private: - void raiseDuplicateContextError() { - std::string op_name_str(name); - throw std::runtime_error( - "Error! Error! Cannot register multiple functions under same context for operator '" - + op_name_str + "'"); - } + void raiseDuplicateContextError(); /*! \brief dedup context maps - static string ctx to custom function */ std::unordered_map forward_ctx_map, backward_ctx_map; @@ -1360,13 +794,10 @@ typedef MXReturnValue (*graphPass_t)(mxnet::ext::Graph* graph, */ class CustomPass { public: - CustomPass() : name("ERROR") {} - explicit CustomPass(const char* pass_name) - : name(pass_name) {} - CustomPass& setBody(graphPass_t fn) { - pass = fn; - return *this; - } + CustomPass(); + explicit CustomPass(const char* pass_name); + + CustomPass& setBody(graphPass_t fn); /*! \brief pass name */ const char* name; @@ -1392,48 +823,24 @@ typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int */ class CustomPartitioner { public: - CustomPartitioner() : name("ERROR") {} - explicit CustomPartitioner(const char* backend_name) : - name(backend_name) {} + CustomPartitioner(); + + explicit CustomPartitioner(const char* backend_name); + CustomPartitioner& addStrategy(const char* prop_name, - const char* sg_name) { - strategies.push_back(prop_name); - op_names.push_back(sg_name); - return *this; - } - CustomPartitioner& setSupportedOps(const char* prop_name, supportedOps_t fn) { - supported_map[std::string(prop_name)] = fn; - return *this; - } - CustomPartitioner& setCreateSelector(const char* prop_name, createSelector_t fn) { - selector_map[std::string(prop_name)] = fn; - return *this; - } - CustomPartitioner& setReviewSubgraph(const char* prop_name, reviewSubgraph_t fn) { - review_map[std::string(prop_name)] = fn; - return *this; - } - supportedOps_t getSupportedOps(int stg_id) { - std::string prop(strategies[stg_id]); - if (supported_map.count(prop) > 0) - return supported_map[prop]; - else - return nullptr; - } - createSelector_t getCreateSelector(int stg_id) { - std::string prop(strategies[stg_id]); - if (selector_map.count(prop) > 0) - return selector_map[prop]; - else - return nullptr; - } - reviewSubgraph_t getReviewSubgraph(int stg_id) { - std::string prop(strategies[stg_id]); - if (review_map.count(prop) > 0) - return review_map[prop]; - else - return nullptr; - } + const char* sg_name); + + CustomPartitioner& setSupportedOps(const char* prop_name, supportedOps_t fn); + + CustomPartitioner& setCreateSelector(const char* prop_name, createSelector_t fn); + + CustomPartitioner& setReviewSubgraph(const char* prop_name, reviewSubgraph_t fn); + + supportedOps_t getSupportedOps(int stg_id); + + createSelector_t getCreateSelector(int stg_id); + + reviewSubgraph_t getReviewSubgraph(int stg_id); /*! \brief partitioner name */ const char* name; @@ -1520,44 +927,6 @@ class Registry { MX_STR_CONCAT(MX_REGISTER_PASS_DEF_(Name), __COUNTER__) = \ Registry::get()->add(MX_TOSTRING(Name)) -/* \brief Class to store error messages from extensions to pass to MXNet */ -class MXerrorMsgs { - public: - /*! - * \brief get singleton pointer to class - * \returns pointer to class - */ - static MXerrorMsgs* get() { - static MXerrorMsgs inst; - return &inst; - } - /*! - * \brief add a new error message - */ - std::stringstream& add(const char* file, int line) { - messages.push_back(std::stringstream()); - messages.back() << file << "[" << line << "]: "; - return messages.back(); - } - int size() { - return messages.size(); - } - const std::string* get(int idx) { - return new std::string(messages.at(idx).str()); - } - - private: - /*! \brief constructor */ - MXerrorMsgs() {} - /*! \brief destructor */ - ~MXerrorMsgs() {} - /*! \brief map of entries in registry */ - std::vector messages; -}; - -// Add a new error message, example: MX_ERROR_MSG << "my error msg"; -#define MX_ERROR_MSG MXerrorMsgs::get()->add(__FILE__, __LINE__) - /* -------------- BELOW ARE CTYPE FUNCTIONS PROTOTYPES --------------- */ /*! @@ -1756,14 +1125,10 @@ typedef int (*msgGet_t)(int idx, const char** msg); extern "C" { /*! \brief returns MXNet library version */ - MX_INT_RET _opVersion() { - return MX_LIBRARY_VERSION; - } + MX_INT_RET _opVersion(); /*! \brief returns number of ops registered in this library */ - MX_INT_RET _opRegSize() { - return mxnet::ext::Registry::get()->size(); - } + MX_INT_RET _opRegSize(); /*! \brief returns operator registration at specified index */ MX_VOID_RET _opRegGet(int idx, const char** name, int *isSGop, @@ -1773,170 +1138,32 @@ extern "C" { const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp, int* create_op_count, mxnet::ext::parseAttrs_t* parse, mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype, - mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate) { - mxnet::ext::CustomOp &op = mxnet::ext::Registry::get()->get(idx); - *name = op.name; - *parse = op.parse_attrs; - *type = op.infer_type; - *stype = op.infer_storage_type; - *shape = op.infer_shape; - *mutate = op.mutate_inputs; - *isSGop = op.isSGop; - op.mapToVector(); - *forward_ctx = op.forward_ctx_cstr.data(); - *forward_fp = op.forward_fp.data(); - *forward_count = op.forward_fp.size(); - *backward_ctx = op.backward_ctx_cstr.data(); - *backward_fp = op.backward_fp.data(); - *backward_count = op.backward_fp.size(); - *create_op_ctx = op.create_op_ctx_cstr.data(); - *create_op_fp = op.create_op_fp.data(); - *create_op_count = op.create_op_fp.size(); - } + mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate); /*! \brief calls free from the external library for library allocated arrays */ - MX_VOID_RET _opCallFree(void* ptr) { - free(ptr); - } + MX_VOID_RET _opCallFree(void* ptr); /*! \brief returns status of calling parse attributes function for operator from library */ MX_INT_RET _opCallParseAttrs(mxnet::ext::parseAttrs_t parseAttrs, const char* const* keys, const char* const* vals, int num, - int* num_in, int* num_out) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - return parseAttrs(attrs, num_in, num_out); - } + int* num_in, int* num_out); /*! \brief returns status of calling inferShape function for operator from library */ MX_INT_RET _opCallInferShape(mxnet::ext::inferShape_t inferShape, const char* const* keys, const char* const* vals, int num, unsigned int** inshapes, int* indims, int num_in, unsigned int*** mod_inshapes, int** mod_indims, - unsigned int*** outshapes, int** outdims, int num_out) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // create a vector of shapes for inputs - std::vector > in_shapes(num_in); - for (int i = 0; i < num_in; i++) { - for (int j = 0; j < indims[i]; j++) { - in_shapes[i].push_back(inshapes[i][j]); - } - } - - // create a vector of shapes for outputs - std::vector > out_shapes(num_out); - - int retval = inferShape(attrs, &in_shapes, &out_shapes); - if (!retval) return retval; - - // allocate space for modified input dims, shape - *mod_indims = static_cast(malloc (num_in * sizeof(int))); - *mod_inshapes = static_cast(malloc (num_in * sizeof(unsigned*))); - - // copy modified input shapes - for (int i = 0; i < num_in; i++) { - (*mod_indims)[i] = in_shapes[i].size(); - (*mod_inshapes)[i] = static_cast(malloc ((*mod_indims)[i] * sizeof(unsigned))); - for (int j = 0; j < (*mod_indims)[i]; j++) { - (*mod_inshapes)[i][j] = in_shapes[i][j]; - } - } - - // allocate space for output dims, shape - *outdims = static_cast(malloc (num_out * sizeof(int))); - *outshapes = static_cast(malloc (num_out * sizeof(unsigned*))); - - // copy output shapes - for (int i = 0; i < num_out; i++) { - (*outdims)[i] = out_shapes[i].size(); - (*outshapes)[i] = static_cast(malloc ((*outdims)[i] * sizeof(unsigned))); - for (int j = 0; j < (*outdims)[i]; j++) { - (*outshapes)[i][j] = out_shapes[i][j]; - } - } - - return retval; - } + unsigned int*** outshapes, int** outdims, int num_out); /*! \brief returns status of calling inferType function for operator from library */ MX_INT_RET _opCallInferType(mxnet::ext::inferType_t inferType, const char* const* keys, const char* const* vals, int num, - int* intypes, int num_in, int* outtypes, int num_out) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // create a vector of types for inputs - std::vector in_types(num_in); - for (int i = 0; i < num_in; i++) { - in_types[i] = intypes[i]; - } - - // create a vector of types for outputs - std::vector out_types(num_out, -1); - - int retval = inferType(attrs, &in_types, &out_types); - if (!retval) - return retval; - - // copy modified input types - for (int i = 0; i < num_in; i++) { - intypes[i] = in_types[i]; - } - // copy output types - for (int i = 0; i < num_out; i++) { - outtypes[i] = out_types[i]; - } - - return retval; - } + int* intypes, int num_in, int* outtypes, int num_out); /*! \brief returns status of calling inferSType function for operator from library */ MX_INT_RET _opCallInferSType(mxnet::ext::inferSType_t inferSType, const char* const* keys, const char* const* vals, int num, - int* instypes, int num_in, int* outstypes, int num_out) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // create a vector of types for inputs - std::vector in_stypes(num_in); - for (int i = 0; i < num_in; i++) { - in_stypes[i] = instypes[i]; - } - - // create a vector of types for outputs - std::vector out_stypes(num_out, -1); - - int retval = inferSType(attrs, &in_stypes, &out_stypes); - - if (!retval) - return retval; - - // copy modified input storage types - for (int i = 0; i < num_in; i++) { - instypes[i] = in_stypes[i]; - } - // copy output storage types - for (int i = 0; i < num_out; i++) { - outstypes[i] = out_stypes[i]; - } - - return retval; - } + int* instypes, int num_in, int* outstypes, int num_out); /*! \brief returns status of calling Forward/Backward function for operator from library */ MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys, @@ -1954,119 +1181,17 @@ extern "C" { void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // create a vector of tensors for inputs - std::vector inputs(num_in); - // create a vector for sparse inputs - std::vector in_sparse(num_in); - - for (int i = 0; i < num_in; i++) { - // Dense representation. - if (instypes[i] == 0) { - inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]), - mxnet::ext::kDefaultStorage); - } else { - // Sparse representation. - mxnet::ext::MXStorageType type; - if (instypes[i] == 1) { - type = mxnet::ext::kRowSparseStorage; - in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); - } else { - type = mxnet::ext::kCSRStorage; - in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], - in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); - } - inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i], - inshapes[i], indims[i], inIDs[i], - mxnet::ext::MXContext(indev_type[i], indev_id[i]), type); - } - } - - // create a vector of tensors for outputs - std::vector outputs(num_out); - std::vector out_sparse(num_out); - - for (int i = 0; i < num_out; i++) { - // Dense representation. - if (outstypes[i] == 0) { - outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), - mxnet::ext::kDefaultStorage); - } else { - // Sparse representation. - mxnet::ext::MXStorageType type; - if (outstypes[i] == 1) { - type = mxnet::ext::kRowSparseStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], - out_indices[i], out_indices_shapes[i]); - } else { - type = mxnet::ext::kCSRStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], - out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); - } - outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), - (mxnet::ext::MXDType)outtypes[i], - outshapes[i], outdims[i], outIDs[i], - mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type); - } - } - - mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, - cuda_stream, sparse_malloc, sparse_alloc, - rng_cpu_states, rng_gpu_states); - return fcomp(attrs, &inputs, &outputs, res); - } + void* rng_cpu_states, void* rng_gpu_states); /*! \brief returns status of calling mutateInputs function for operator from library */ MX_INT_RET _opCallMutateInputs(mxnet::ext::mutateInputs_t mutate, const char* const* keys, const char* const* vals, int num, - int** mutate_indices, int* indices_size) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // create a vector of mutate input indices - std::vector mut_ind; - - int retval = mutate(attrs, &mut_ind); - if (!retval) - return retval; - - // output the input indices - *indices_size = mut_ind.size(); - *mutate_indices = static_cast(malloc (*indices_size * sizeof(int))); - for (int i = 0; i < *indices_size; i++) { - (*mutate_indices)[i] = mut_ind[i]; - } - - return retval; - } + int** mutate_indices, int* indices_size); /*! \brief returns status of calling createStatefulOp function for operator from library */ MX_INT_RET _opCallCreateOpState(mxnet::ext::createOpState_t create_op, const char* const* keys, const char* const* vals, int num, - void** state_op) { - // create map of attributes from list - std::unordered_map attrs; - for (int i = 0; i < num; i++) { - attrs[std::string(keys[i])] = std::string(vals[i]); - } - - // void pointer to hold custom state op instance created in custom library - // eventually state_op pointer is populated by instance from custom library - mxnet::ext::CustomStatefulOp** op_ptr = - reinterpret_cast(state_op); - return create_op(attrs, op_ptr); - } + void** state_op); /*! \brief returns status of calling Stateful Forward/Backward for operator from library */ MX_INT_RET _opCallFStatefulCompute(int is_forward, void* state_op, const int64_t** inshapes, @@ -2084,194 +1209,48 @@ extern "C" { void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states) { - // create a vector of tensors for inputs - std::vector inputs(num_in); - // create a vector for sparse inputs - std::vector in_sparse(num_in); - - for (int i = 0; i < num_in; i++) { - if (instypes[i] == 0) { - // Dense representation. - inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]), - mxnet::ext::kDefaultStorage); - } else { - // Sparse representation. - mxnet::ext::MXStorageType type; - if (instypes[i] == 1) { - type = mxnet::ext::kRowSparseStorage; - in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); - } else { - type = mxnet::ext::kCSRStorage; - in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], - in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); - } - inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i], - inshapes[i], indims[i], inIDs[i], - mxnet::ext::MXContext(indev_type[i], indev_id[i]), type); - } - } - - // create a vector of tensors for outputs - std::vector outputs(num_out); - // create a vector for sparse outputs - std::vector out_sparse(num_out); - - for (int i = 0; i < num_out; i++) { - if (outstypes[i] == 0) { - // Dense representation. - outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), - mxnet::ext::kDefaultStorage); - } else { - // Sparse representation. - mxnet::ext::MXStorageType type; - if (outstypes[i] == 1) { - type = mxnet::ext::kRowSparseStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], - out_indices_shapes[i]); - } else { - type = mxnet::ext::kCSRStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], - out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); - } - outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), - (mxnet::ext::MXDType)outtypes[i], - outshapes[i], outdims[i], outIDs[i], - mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type); - } - } - - mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, - stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states); - - mxnet::ext::CustomStatefulOp* op_ptr = - reinterpret_cast(state_op); - if (is_forward) { - return op_ptr->Forward(&inputs, &outputs, res); - } - return op_ptr->Backward(&inputs, &outputs, res); - } + void* rng_cpu_states, void* rng_gpu_states); /*! \brief returns number of partitioners registered in this library */ - MX_INT_RET _partRegSize() { - return mxnet::ext::Registry::get()->size(); - } + MX_INT_RET _partRegSize(); /* returns number of strategies registered for partitioner * at specified index */ - MX_INT_RET _partRegGetCount(int idx, const char** name) { - mxnet::ext::CustomPartitioner part = - mxnet::ext::Registry::get()->get(idx); - *name = part.name; - return part.strategies.size(); - } + MX_INT_RET _partRegGetCount(int idx, const char** name); /*! \brief returns partitioner registration at specified index */ MX_VOID_RET _partRegGet(int part_idx, int stg_idx, const char** strategy, mxnet::ext::supportedOps_t* supportedOps, mxnet::ext::createSelector_t* createSelector, - mxnet::ext::reviewSubgraph_t* reviewSubgraph, const char** op_name) { - mxnet::ext::CustomPartitioner part = - mxnet::ext::Registry::get()->get(part_idx); - *strategy = part.strategies[stg_idx]; - *op_name = part.op_names[stg_idx]; - *supportedOps = part.getSupportedOps(stg_idx); - *createSelector = part.getCreateSelector(stg_idx); - *reviewSubgraph = part.getReviewSubgraph(stg_idx); - } + mxnet::ext::reviewSubgraph_t* reviewSubgraph, const char** op_name); /*! \brief returns status of calling supported ops function from library */ MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, const char *json, int num_ids, int *ids, const char* const* opt_keys, - const char* const* opt_vals, int num_opts) { - mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); - // create map of options from list - std::unordered_map opts; - for (int i = 0; i < num_opts; i++) - opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); - - // create array of subgraph IDs for operator support - std::vector _ids(num_ids, -2); - // call user's supportedOps function - mxnet::ext::MXReturnValue retval = supportedOps(graph, &_ids, opts); - if (!retval) return retval; - - // copy bools in ids to ints - for (int i = 0; i < num_ids; i++) - ids[i] = _ids[i]; - - return retval; - } + const char* const* opt_vals, int num_opts); /*! \brief returns status of calling create selector function from library */ MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, const char *json, void** selector, const char* const* opt_keys, - const char* const* opt_vals, int num_opts) { - mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); - // create map of options from list - std::unordered_map opts; - for (int i = 0; i < num_opts; i++) - opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); - - // void pointer to hold selector instance created in custom library - // eventually pointer is populated by instance from custom library - mxnet::ext::CustomOpSelector** sel_ptr = - reinterpret_cast(selector); - - // call user's createSelector function - return createSelector(graph, sel_ptr, opts); - } + const char* const* opt_vals, int num_opts); /*! \brief returns status of calling select function from library */ - MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected) { - mxnet::ext::CustomOpSelector* sel_ptr = - reinterpret_cast(sel_inst); - *selected = sel_ptr->Select(nodeID); - } + MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected); /*! \brief returns status of calling select input function from library */ MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID, - int input_nodeID, int* selected) { - mxnet::ext::CustomOpSelector* sel_ptr = - reinterpret_cast(sel_inst); - *selected = sel_ptr->SelectInput(nodeID, input_nodeID); - } + int input_nodeID, int* selected); /*! \brief returns status of calling select output function from library */ MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID, - int output_nodeID, int* selected) { - mxnet::ext::CustomOpSelector* sel_ptr = - reinterpret_cast(sel_inst); - *selected = sel_ptr->SelectOutput(nodeID, output_nodeID); - } + int output_nodeID, int* selected); /*! \brief returns status of calling filter function from library */ MX_VOID_RET _partCallFilter(void* sel_inst, int* candidates, int num_candidates, - int** keep, int* num_keep) { - mxnet::ext::CustomOpSelector* sel_ptr = - reinterpret_cast(sel_inst); - std::vector candidates_(num_candidates); - for (int i=0; i < num_candidates; i++) { - candidates_[i] = candidates[i]; - } - std::vector keep_; - - sel_ptr->Filter(candidates_, &keep_); - - *num_keep = keep_.size(); - *keep = static_cast(malloc(keep_.size() * sizeof(int))); - for (unsigned i=0; i < keep_.size(); i++) - (*keep)[i] = keep_[i]; - } + int** keep, int* num_keep); /*! \brief returns status of calling reset selector function from library */ - MX_VOID_RET _partCallReset(void* sel_inst) { - mxnet::ext::CustomOpSelector* sel_ptr = - reinterpret_cast(sel_inst); - sel_ptr->Reset(); - } + MX_VOID_RET _partCallReset(void* sel_inst); /*! \brief returns status of calling review subgraph function from library */ MX_INT_RET _partCallReviewSubgraph(mxnet::ext::reviewSubgraph_t reviewSubgraph, const char *json, @@ -2287,79 +1266,14 @@ extern "C" { void* const* aux_data, const int64_t* const* aux_shapes, const int* aux_dims, const int* aux_types, const size_t* aux_IDs, const char* const* aux_dev_type, - const int* aux_dev_id) { - mxnet::ext::Graph *subgraph = mxnet::ext::Graph::fromString(json); - bool accept_bool = false; - // create map of attributes from list - std::unordered_map opts; - for (int i = 0; i < num_opts; i++) - opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); - - // create a map of named tensors for args - std::unordered_map args; - for (int i = 0; i < num_args; i++) { - std::vector shapes; - for (int j = 0; j < arg_dims[i]; j++) - shapes.push_back(arg_shapes[i][j]); - - mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i], - arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], arg_dev_id[i])); - args[arg_names[i]] = tensor; - } - // create a map of named tensors for aux - std::unordered_map aux; - for (int i = 0; i < num_aux; i++) { - std::vector shapes; - for (int j = 0; j < aux_dims[i]; j++) - shapes.push_back(aux_shapes[i][j]); - - mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i], - aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], - aux_dev_id[i])); - aux[aux_names[i]] = tensor; - } - - subgraph->_setParams(&args, &aux); - mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph, subgraph_id, &accept_bool, - opts); - if (!retval) return retval; - - *accept = accept_bool; - - if (subgraph->attrs.size() > 0) { - *num_attrs = subgraph->attrs.size(); - // allocate space for attributes - *attr_keys = static_cast(malloc (*num_attrs * sizeof(char*))); - *attr_vals = static_cast(malloc (*num_attrs * sizeof(char*))); - - // copy attributes - int i = 0; - for (auto kv : subgraph->attrs) { - (*attr_keys)[i] = static_cast(malloc ((kv.first.size()+1) * sizeof(char))); - std::string val = kv.second.dump(); // convert JsonVal back to string - (*attr_vals)[i] = static_cast(malloc ((val.size()+1) * sizeof(char))); - snprintf((*attr_keys)[i], kv.first.size()+1, "%s", kv.first.c_str()); - snprintf((*attr_vals)[i], val.size()+1, "%s", val.c_str()); - i++; - } - } - - return retval; - } + const int* aux_dev_id); /*! \brief returns number of graph passes registered in this library */ - MX_INT_RET _passRegSize() { - return mxnet::ext::Registry::get()->size(); - } + MX_INT_RET _passRegSize(); /*! \brief returns pass registration at specified index */ MX_VOID_RET _passRegGet(int pass_idx, mxnet::ext::graphPass_t* graphPass, - const char** pass_name) { - mxnet::ext::CustomPass pass = - mxnet::ext::Registry::get()->get(pass_idx); - *graphPass = pass.pass; - *pass_name = pass.name; - } + const char** pass_name); /*! \brief returns status of calling graph pass function from library */ MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, const char *json, @@ -2374,49 +1288,7 @@ extern "C" { const int* aux_dims, const int* aux_types, const size_t* aux_IDs, const char* const* aux_dev_type, const int* aux_dev_id, mxnet::ext::nd_malloc_t nd_malloc, - const void* nd_alloc) { - mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); - // create map of attributes from list - std::unordered_map opts; - for (int i = 0; i < num_opts; i++) - opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); - - // create a map of named tensors for args - std::unordered_map args; - for (int i = 0; i < num_args; i++) { - std::vector shapes; - for (int j = 0; j < arg_dims[i]; j++) - shapes.push_back(arg_shapes[i][j]); - - mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i], - arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], - arg_dev_id[i])); - args[arg_names[i]] = tensor; - } - // create a map of named tensors for aux - std::unordered_map aux; - for (int i = 0; i < num_aux; i++) { - std::vector shapes; - for (int j = 0; j < aux_dims[i]; j++) - shapes.push_back(aux_shapes[i][j]); - - mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i], - aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], - aux_dev_id[i])); - aux[aux_names[i]] = tensor; - } - - std::unordered_map new_args, new_aux; - mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc); - graph->_setParams(&args, &aux); - graph->_setPassResource(&res); - mxnet::ext::MXReturnValue retval = graphPass(graph, opts); - if (!retval) return retval; - - std::string *tmp = new std::string(graph->toString()); - *out_graph = const_cast(tmp->c_str()); - return retval; - } + const void* nd_alloc); /*! * \brief Checks if the MXNet version is supported by the library. @@ -2432,13 +1304,10 @@ extern "C" { #endif initialize(int version); - MX_INT_RET _msgSize() { - return mxnet::ext::MXerrorMsgs::get()->size(); - } + MX_INT_RET _msgSize(); /*! \brief returns operator registration at specified index */ - MX_VOID_RET _msgGet(int idx, const char** msg) { - *msg = mxnet::ext::MXerrorMsgs::get()->get(idx)->c_str(); - } + MX_VOID_RET _msgGet(int idx, const char** msg); } // extern "C" + #endif // MXNET_LIB_API_H_ diff --git a/python/mxnet/library.py b/python/mxnet/library.py index 487fff940fda..22528a08dc01 100644 --- a/python/mxnet/library.py +++ b/python/mxnet/library.py @@ -24,6 +24,17 @@ from .ndarray.register import _make_ndarray_function from .symbol.register import _make_symbol_function +class MXlib: + """Holds a pointed to a loaded shared library and closes it on destruction""" + def __init__(self, handle): + self.handle = handle + def __del__(self): + libdl = ctypes.CDLL("libdl.so") + libdl.dlclose(self.handle) + +# set of libraries loaded +loaded_libs = [] + def load(path, verbose=True): """Loads library dynamically. @@ -39,6 +50,8 @@ def load(path, verbose=True): --------- void """ + global loaded_libs + #check if path exists if not os.path.exists(path): raise MXNetError("load path %s does NOT exist" % path) @@ -53,7 +66,10 @@ def load(path, verbose=True): verbose_val = 1 if verbose else 0 byt_obj = path.encode('utf-8') chararr = ctypes.c_char_p(byt_obj) - check_call(_LIB.MXLoadLib(chararr, mx_uint(verbose_val))) + lib_ptr = ctypes.c_void_p(0) + check_call(_LIB.MXLoadLib(chararr, mx_uint(verbose_val), ctypes.byref(lib_ptr))) + # add library pointer to list so it can be closed later + loaded_libs.append(MXlib(lib_ptr)) #regenerate operators _init_op_module('mxnet', 'ndarray', _make_ndarray_function) @@ -73,7 +89,6 @@ def load(path, verbose=True): func = getattr(mx_sym_op, op) setattr(mx_sym, op, func) - def compiled_with_gcc_cxx11_abi(): """Check if the library is compiled with _GLIBCXX_USE_CXX11_ABI. diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index dc4e168372ec..1def2fdf981b 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1462,15 +1462,15 @@ void registerPasses(void *lib, int verbose, mxnet::ext::msgSize_t msgSize, * \brief Loads dynamic custom library and initializes it * \param path library path */ -int MXLoadLib(const char *path, unsigned verbose) { +int MXLoadLib(const char *path, unsigned verbose, void** lib) { API_BEGIN(); - void *lib = LibraryInitializer::Get()->lib_load(path); - if (!lib) + *lib = LibraryInitializer::Get()->lib_load(path); + if (!*lib) LOG(FATAL) << "Unable to load library"; // check that library and MXNet use same version of library API mxnet::ext::opVersion_t opVersion = - get_func(lib, const_cast(MXLIB_OPVERSION_STR)); + get_func(*lib, const_cast(MXLIB_OPVERSION_STR)); int libVersion = opVersion(); if (MX_LIBRARY_VERSION != libVersion) LOG(FATAL) << "Library version (" << libVersion << ") does not match MXNet version (" @@ -1478,22 +1478,22 @@ int MXLoadLib(const char *path, unsigned verbose) { // get error messaging APIs mxnet::ext::msgSize_t msgSize = - get_func(lib, const_cast(MXLIB_MSGSIZE_STR)); + get_func(*lib, const_cast(MXLIB_MSGSIZE_STR)); mxnet::ext::msgGet_t msgGet = - get_func(lib, const_cast(MXLIB_MSGGET_STR)); + get_func(*lib, const_cast(MXLIB_MSGGET_STR)); // initialize library by passing MXNet version mxnet::ext::initialize_t initialize = - get_func(lib, const_cast(MXLIB_INITIALIZE_STR)); + get_func(*lib, const_cast(MXLIB_INITIALIZE_STR)); if (!initialize(static_cast(MXNET_VERSION))) { std::string msgs = getExtensionMsgs(msgSize, msgGet); LOG(FATAL) << "Library failed to initialize" << msgs; } // find ops, partitioners, and passes in library - registerOperators(lib, verbose, msgSize, msgGet); - registerPartitioners(lib, verbose, msgSize, msgGet); - registerPasses(lib, verbose, msgSize, msgGet); + registerOperators(*lib, verbose, msgSize, msgGet); + registerPartitioners(*lib, verbose, msgSize, msgGet); + registerPasses(*lib, verbose, msgSize, msgGet); API_END(); } diff --git a/src/initialize.cc b/src/initialize.cc index b207423a4fb5..9ef51219609f 100644 --- a/src/initialize.cc +++ b/src/initialize.cc @@ -97,9 +97,7 @@ LibraryInitializer::LibraryInitializer() install_pthread_atfork_handlers(); } -LibraryInitializer::~LibraryInitializer() { - close_open_libs(); -} +LibraryInitializer::~LibraryInitializer() = default; bool LibraryInitializer::lib_is_loaded(const std::string& path) const { return loaded_libs.count(path) > 0; @@ -125,7 +123,13 @@ void* LibraryInitializer::lib_load(const char* path) { return nullptr; } #else - handle = dlopen(path, RTLD_LAZY); + /* library loading flags: + * RTLD_LAZY - Perform lazy binding. Only resolve symbols as the code that + * references them is executed. + * RTLD_LOCAL - Symbols defined in this library are not made available to + * resolve references in subsequently loaded libraries. + */ + handle = dlopen(path, RTLD_LAZY | RTLD_LOCAL); if (!handle) { LOG(FATAL) << "Error loading library: '" << path << "'\n" << dlerror(); return nullptr; diff --git a/src/lib_api.cc b/src/lib_api.cc new file mode 100644 index 000000000000..4181d8b70213 --- /dev/null +++ b/src/lib_api.cc @@ -0,0 +1,1593 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file lib_api.cc + * \brief APIs to interact with libraries + * This API specifies function prototypes to + * register custom ops, partitioner, and passes + * for library authors + * See example/extension/lib_custom_op/README.md + * See example/extension/lib_subgraph/README.md + * See example/extension/lib_pass/README.md + */ + +#include "mxnet/lib_api.h" + +mxnet::ext::MXerrorMsgs* mxnet::ext::MXerrorMsgs::get() { + static MXerrorMsgs inst; + return &inst; + } + +std::stringstream& mxnet::ext::MXerrorMsgs::add(const char* file, int line) { + messages.emplace_back(); + messages.back() << file << "[" << line << "]: "; + return messages.back(); +} + +int mxnet::ext::MXerrorMsgs::size() { + return messages.size(); +} + +const std::string* mxnet::ext::MXerrorMsgs::get(int idx) { + return new std::string(messages.at(idx).str()); +} + +mxnet::ext::MXContext::MXContext() : dev_type("error"), dev_id(-1) {} + +mxnet::ext::MXContext::MXContext(std::string dev_type_, int dev_id_) + : dev_type(std::move(dev_type_)), dev_id(dev_id_) {} + +mxnet::ext::MXContext::MXContext(const char* dev_type_, int dev_id_) + : dev_type(dev_type_), dev_id(dev_id_) {} + +mxnet::ext::MXContext mxnet::ext::MXContext::CPU() { return MXContext("cpu", 0); } + +mxnet::ext::MXContext mxnet::ext::MXContext::GPU() { return MXContext("gpu", 0); } + +mxnet::ext::MXContext mxnet::ext::MXContext::CPU(int dev_id) { return MXContext("cpu", dev_id); } + +mxnet::ext::MXContext mxnet::ext::MXContext::GPU(int dev_id) { return MXContext("gpu", dev_id); } + +void mxnet::ext::MXSparse::set(void *data_ptr, const int64_t* dims, int ndims, void *idx, + int64_t num_idx, void *idx_ptr, int64_t num_idx_ptr) { + data = data_ptr; + // If CSR, num of non-zero elemets is num_idx, + // If row sparse, num of elements is num_idx * width. + data_len = num_idx; + if (!idx_ptr) { + for (int i = 1; i < ndims; ++i) + data_len *= dims[i]; + } + + indices = reinterpret_cast(idx); + indices_len = num_idx; + + if (idx_ptr) { + indptr = reinterpret_cast(idx_ptr); + indptr_len = num_idx_ptr; + } +} + +mxnet::ext::MXTensor::MXTensor() : data_ptr(nullptr), dtype(kUNSET), verID(0), + stype(kDefaultStorage) {} +mxnet::ext::MXTensor::MXTensor(const MXTensor& oth) : data_ptr(oth.data_ptr), shape(oth.shape), + dtype(oth.dtype), verID(oth.verID), + ctx(oth.ctx), stype(oth.stype) { + setDLTensor(); +} + +mxnet::ext::MXTensor::MXTensor(void *data_ptr, std::vector shape, MXDType dtype, + size_t vID, MXContext mx_ctx, MXStorageType stype) + : data_ptr(data_ptr), shape(std::move(shape)), dtype(dtype), verID(vID), ctx(std::move(mx_ctx)), + stype(stype) { + setDLTensor(); +} + +void mxnet::ext::MXTensor::setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, + size_t vID, MXContext mx_ctx, MXStorageType storage_type) { + data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = storage_type; + shape.clear(); + for (int j = 0; j < ndims; j++) { + shape.push_back(dims[j]); + } + setDLTensor(); +} + +void mxnet::ext::MXTensor::setDLTensor() { + dltensor.data = data_ptr; + dltensor.ndim = shape.size(); + dltensor.shape = const_cast(shape.data()); + dltensor.strides = nullptr; + dltensor.byte_offset = 0; + dltensor.dtype.lanes = 1; + dltensor.ctx.device_id = ctx.dev_id; + if (ctx.dev_type == "cpu") + dltensor.ctx.device_type = kDLCPU; + else if (ctx.dev_type == "gpu") + dltensor.ctx.device_type = kDLGPU; + else if (ctx.dev_type == "opencl") + dltensor.ctx.device_type = kDLOpenCL; + else if (ctx.dev_type == "vulcan") + dltensor.ctx.device_type = kDLVulkan; + else if (ctx.dev_type == "metal") + dltensor.ctx.device_type = kDLMetal; + else if (ctx.dev_type == "vpi") + dltensor.ctx.device_type = kDLVPI; + else if (ctx.dev_type == "rocm") + dltensor.ctx.device_type = kDLROCM; + else + dltensor.ctx.device_type = kDLExtDev; + switch (dtype) { + case kFloat32: + dltensor.dtype.code = kDLFloat; + dltensor.dtype.bits = 32; + break; + case kFloat64: + dltensor.dtype.code = kDLFloat; + dltensor.dtype.bits = 64; + break; + case kFloat16: + dltensor.dtype.code = kDLFloat; + dltensor.dtype.bits = 16; + break; + case kUint8: + dltensor.dtype.code = kDLUInt; + dltensor.dtype.bits = 8; + break; + case kInt32: + dltensor.dtype.code = kDLInt; + dltensor.dtype.bits = 32; + break; + case kInt8: + dltensor.dtype.code = kDLInt; + dltensor.dtype.bits = 8; + break; + case kInt64: + dltensor.dtype.code = kDLInt; + dltensor.dtype.bits = 64; + break; + default: + dltensor.dtype.code = 0; + dltensor.dtype.bits = 0; + throw std::runtime_error("Error! Invalid dtype flag: " + + std::to_string(static_cast(dtype)) + + " when constructing MXTensor"); + } +} + +int64_t mxnet::ext::MXTensor::size() const { + int64_t size = 1; + for (auto &s : shape) + size *= s; + return size; +} + +bool mxnet::ext::MXTensor::isSame(const MXTensor &oth) const { + return data_ptr == oth.data_ptr && + dtype == oth.dtype && + verID == oth.verID && + ctx.dev_type == oth.ctx.dev_type && + ctx.dev_id == oth.ctx.dev_id && + shape == oth.shape && + stype == oth.stype; +} + +mxnet::ext::PassResource::PassResource(std::unordered_map* new_args, + std::unordered_map* new_aux, + nd_malloc_t nd_malloc, const void* nd_alloc) + : new_args_(new_args), new_aux_(new_aux), nd_malloc_(nd_malloc), nd_alloc_(nd_alloc) {} + +mxnet::ext::MXTensor* mxnet::ext::PassResource::alloc_arg(const std::string& name, + const std::vector& shapes, + const mxnet::ext::MXContext &ctx, + mxnet::ext::MXDType dtype) const { + void* data; + nd_malloc_(nd_alloc_, shapes.data(), shapes.size(), ctx.dev_type.c_str(), ctx.dev_id, + dtype, name.c_str(), 1, &data); + MXTensor tensor(data, shapes, dtype, 0, ctx, kDefaultStorage); + (*new_args_)[name] = tensor; + return &(new_args_->at(name)); +} + +mxnet::ext::MXTensor* mxnet::ext::PassResource::alloc_aux(const std::string& name, + const std::vector& shapes, + const mxnet::ext::MXContext &ctx, + mxnet::ext::MXDType dtype) const { + void* data; + nd_malloc_(nd_alloc_, shapes.data(), shapes.size(), ctx.dev_type.c_str(), ctx.dev_id, + dtype, name.c_str(), 0, &data); + MXTensor tensor(data, shapes, dtype, 0, ctx, kDefaultStorage); + (*new_aux_)[name] = tensor; + return &(new_aux_->at(name)); +} + +mxnet::ext::OpResource::OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, + xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream, + sparse_malloc_t sparse_malloc_fp, void* sparse_alloc_fp, + void* rng_cpu_states, void* rng_gpu_states) + : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), + cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream), + sparse_malloc(sparse_malloc_fp), sparse_alloc(sparse_alloc_fp), + rand_cpu_states(rng_cpu_states), rand_gpu_states(rng_gpu_states) {} + +void* mxnet::ext::OpResource::alloc_cpu(int size) const { + return cpu_malloc(cpu_alloc, size); +} + +void* mxnet::ext::OpResource::alloc_gpu(int size) const { + return gpu_malloc(gpu_alloc, size); +} + +void mxnet::ext::OpResource::alloc_sparse(mxnet::ext::MXSparse* sparse, int index, + int indices_len, int indptr_len) const { + sparse_malloc(sparse_alloc, index, indices_len, indptr_len, + &(sparse->data), &(sparse->indices), &(sparse->indptr)); +} + +mxnet::ext::mx_cpu_rand_t* mxnet::ext::OpResource::get_cpu_rand_states() const { + return static_cast(rand_cpu_states); +} + +std::string mxnet::ext::getShapeAt(const std::string& shape, unsigned index) { + int idx = 1; // start at 1 to skip the first square bracket [ + // find the beginning of the output shape for the particular output index + for (unsigned x=0; x < index; x++) + idx = shape.find("[", idx+1); + int stop = shape.find("]", idx); // find stop index for this output shape + // add this shape to the list + return shape.substr(idx, stop-idx+1); +} + +std::string mxnet::ext::getDtypeAt(const std::string& dtype, unsigned index) { + // find the beginning of the output dtype for the particular output index + int idx = 0; + for (unsigned x=0; x < index; x++) + idx = dtype.find(",", idx+1); + int stop = dtype.find(",", idx+1); // find stop index for this output dtype + if (stop == -1) stop = dtype.find("]", idx+1); + return dtype.substr(idx+1, stop-idx-1); +} + +mxnet::ext::JsonVal::JsonVal() : type(ERR), num(-1), str("") {} +mxnet::ext::JsonVal::JsonVal(mxnet::ext::JsonType t) : type(t), num(-1), str("") {} +mxnet::ext::JsonVal::JsonVal(std::string s) : type(STR), num(-1), str(std::move(s)) {} +mxnet::ext::JsonVal::JsonVal(int n) : type(NUM), num(n), str(std::to_string(n)) {} +mxnet::ext::JsonVal::JsonVal(JsonType t, int n, std::string s) : type(t), num(n), + str(std::move(s)) {} + +bool mxnet::ext::JsonVal::operator<(const mxnet::ext::JsonVal &o) const { + // for string JSON objects compare the string + if (type == STR) return type == o.type && str < o.str; + // for number JSON objects compare the number + if (type == NUM) return type == o.type && num < o.num; + // for list JSON objects, compare the size of list, and then each object in the list + if (type == LIST) { + if (list.size() != o.list.size()) return false; + for (unsigned int i=0; i< list.size(); i++) + if (list[i] < o.list[i]) + return false; // if we find an object that doesnt match return + return true; // all objects in lists matched + } + // for map JSON objects, compare the size of map, and then each key/value in the maps + if (type == MAP) { + if (map.size() != o.map.size()) return false; + for (auto &item : map) { + // if one map is missing a key in another return + if (o.map.find(item.first) == o.map.end()) return false; + if (item.second < o.map.at(item.first)) return false; + } + return true; + } + return type < o.type; +} + +std::string mxnet::ext::JsonVal::dump() const { + std::string ret; + switch (type) { + case ERR: + ret = "json(Error)"; + break; + case STR: + ret = "\"" + str + "\""; + break; + case NUM: + ret = str; + break; + case LIST: + ret = "["; + for (unsigned i=0; i < list.size(); i++) { + auto &item = list[i]; + ret += item.dump(); + if (i < list.size()-1) + ret += ","; + } + ret += "]"; + break; + case MAP: + ret = "{"; + unsigned cnt = 0; + for (auto &item : map) { + ret += item.first.dump() + " : " + item.second.dump(); + if (cnt++ < map.size()-1) + ret += ","; + } + ret += "}"; + break; + } + return ret; +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse(const std::string& json) { + unsigned int idx = 0; + return JsonVal::parse(json, &idx); +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse_string(const std::string& json, unsigned int* idx) { + JsonVal ret(STR); + while (*idx < json.size()) { + if (json[*idx] == '"') { + ++(*idx); + return ret; + } else { + ret.str += json[*idx]; + ++(*idx); + } + } + MX_ERROR_MSG << "Error! Unable to parse string: '" << json.substr(*idx) << "'" << std::endl; + return JsonVal(); +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse_num(const std::string& json, unsigned int* idx) { + JsonVal ret(NUM); + while (*idx < json.size()) { + if (json[*idx] >= '0' && json[*idx] <= '9') { + ret.str += json[*idx]; + ++(*idx); + } else { + break; + } + } + ret.num = std::stoi(ret.str); + return ret; +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse_list(const std::string& json, unsigned int* idx) { + JsonVal ret(LIST); + while (*idx < json.size()) { + if (json[*idx] == ']') { + ++(*idx); + return ret; + } else { + JsonVal item = JsonVal::parse(json, idx); + if (item.type != ERR) + ret.list.push_back(item); + } + } + MX_ERROR_MSG << "Error! Unable to parse list: '" << json.substr(*idx) << "'" << std::endl; + return JsonVal(); +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse_map(const std::string& json, unsigned int* idx) { + JsonVal ret(MAP), key; + while (*idx < json.size()) { + if (json[*idx] == '}') { + ++(*idx); + return ret; + } else { + JsonVal item = JsonVal::parse(json, idx); + if (key.type == ERR) { + key = item; + } else { + ret.map[key] = item; + key.type = ERR; + } + } + } + MX_ERROR_MSG << "Error! Unable to parse map: '" << json.substr(*idx) << "'" << std::endl; + return mxnet::ext::JsonVal(); +} + +mxnet::ext::JsonVal mxnet::ext::JsonVal::parse(const std::string& json, unsigned int *idx) { + JsonVal ret; + while (*idx < json.size()) { + if (json[*idx] == '"') { + ++(*idx); + ret = JsonVal::parse_string(json, idx); + } else if (json[*idx] >= '0' && json[*idx] <= '9') { + ret = JsonVal::parse_num(json, idx); + } else if (json[*idx] == '[') { + ++(*idx); + ret = JsonVal::parse_list(json, idx); + } else if (json[*idx] == '{') { + ++(*idx); + ret = JsonVal::parse_map(json, idx); + } else if (json[*idx] == ']' || json[*idx] == '}') {return ret;} + if (ret.type != ERR) return ret; + ++(*idx); + } + return ret; +} + +std::string mxnet::ext::JsonVal::toString() const { + std::string ret; + switch (type) { + case ERR: + ret = "json(Error)"; + break; + case STR: + ret = "json(STR:" + str + ")"; + break; + case NUM: + ret = "json(INT:" + str + ")"; + break; + case LIST: + ret = "json(LIST:["; + for (auto &item : list) + ret += item.toString() + ","; + ret += "])"; + break; + case MAP: + ret = "json(MAP:{"; + for (auto &item : map) + ret += item.first.toString() + " : " + item.second.toString() + ","; + ret += "})"; + break; + } + return ret; +} + +mxnet::ext::Node::Node() {tensor = nullptr;} + +void mxnet::ext::Node::_setPassResource(mxnet::ext::PassResource* res_) {res = res_;} + +void mxnet::ext::Node::alloc_arg(const std::vector& shapes, + const mxnet::ext::MXContext &ctx, mxnet::ext::MXDType dtype) { + if (!res) + throw std::runtime_error("Node not initialized. Cannot use alloc_arg outside of graph passes."); + tensor = res->alloc_arg(name, shapes, ctx, dtype); +} + +void mxnet::ext::Node::alloc_aux(const std::vector& shapes, + const mxnet::ext::MXContext &ctx, mxnet::ext::MXDType dtype) { + if (!res) + throw std::runtime_error("Node not initialized. Cannot use alloc_aux outside of graph passes."); + tensor = res->alloc_aux(name, shapes, ctx, dtype); +} + +mxnet::ext::Graph::Graph() : res(nullptr) {} + +mxnet::ext::Graph::~Graph() { + for (auto &node : nodes) + delete node; +} + +mxnet::ext::Graph* mxnet::ext::Graph::fromString(const std::string& json) { + JsonVal val = JsonVal::parse(json); + return fromJson(val); +} + +mxnet::ext::Graph* mxnet::ext::Graph::fromJson(mxnet::ext::JsonVal val) { + // get nodes list + JsonVal nodes = val.map[JsonVal("nodes")]; + Graph *g = new Graph(); + + std::map nodeMap; + // loop over nodes + for (int i = 0; i < nodes.list.size(); i++) { + Node* n = new Node(); + g->nodes.push_back(n); + JsonVal node = nodes.list[i]; + + // set the op info + n->op = node.map[JsonVal("op")].str; + n->name = node.map[JsonVal("name")].str; + + // if op is null it is an input to the graph + if (n->op.compare("null") == 0) + g->inputs.push_back(n); + + // set attrs + JsonVal attributes = node.map[JsonVal("attrs")]; + for (auto& kv : attributes.map) { + n->attrs[kv.first.str] = kv.second.str; + } + + // set subgraphs, parsing each into a graph + if (node.map.count(JsonVal("subgraphs")) > 0) { + JsonVal subgraphs = node.map[JsonVal("subgraphs")]; + for (auto &subgraph : subgraphs.list) { + n->subgraphs.push_back(fromJson(subgraph)); + } + } + + // set node inputs + JsonVal node_inputs = node.map[JsonVal("inputs")]; + n->inputs.resize(node_inputs.list.size()); + for (int j = 0; j < node_inputs.list.size(); j++) { + JsonVal input = node_inputs.list[j]; + NodeEntry& entry = n->inputs[j]; + // get pointer to other node + entry.node = nodeMap[input.list[0].num]; + // get the other node's output index + entry.entry = input.list[1].num; + // set other nodes output as connected to this node + entry.node->outputs.push_back({n, j}); + } + nodeMap[i] = n; + } + + // set graph level outputs + JsonVal& heads = val.map[JsonVal("heads")]; + g->outputs.resize(heads.list.size()); + for (int i = 0; i < heads.list.size(); i++) { + JsonVal head = heads.list[i]; + g->outputs[i].node = nodeMap[head.list[0].num]; + g->outputs[i].entry = head.list[1].num; + } + + // add all attributes to the graph + for (auto& kv : val.map) { + if (kv.first.str.compare("nodes") != 0 && + kv.first.str.compare("heads") != 0 && + kv.first.str.compare("node_row_ptr") != 0 && + kv.first.str.compare("arg_nodes") != 0) { + g->attrs[kv.first.str] = kv.second; + } + } + return g; +} + +/* \brief convert graph object back to JSON object */ +mxnet::ext::JsonVal mxnet::ext::Graph::toJson() { + // top level object is a map + JsonVal val(MAP); + + // add attributes + for (auto& kv : attrs) { + val.map[JsonVal(kv.first)] = kv.second; + } + + // sort graph nodes in topological order, create mapping of node to index + std::map nodeMap; + std::vector sorted = topological_sort(); + // nodes are in reverse topological order in the vector (back is first) + // so loop from end to front over the vector 'sorted' + for (int i = sorted.size()-1; i >= 0; i--) { + nodeMap[sorted[i]] = sorted.size()-1-i; + } + + // create node_row_ptr entry + val.map[JsonVal("node_row_ptr")] = JsonVal(LIST); + JsonVal& node_row_ptr = val.map[JsonVal("node_row_ptr")]; + for (int i = 0; i < nodes.size(); i++) + node_row_ptr.list.emplace_back(i); + + // add all input nodes + val.map[JsonVal("arg_nodes")] = JsonVal(LIST); + JsonVal& arg_nodes = val.map[JsonVal("arg_nodes")]; + for (auto &input : inputs) + arg_nodes.list.emplace_back(nodeMap[input]); + + // add all output nodes + val.map[JsonVal("heads")] = JsonVal(LIST); + JsonVal& heads = val.map[JsonVal("heads")]; + for (int i = 0; i < outputs.size(); i++) { + heads.list.emplace_back(LIST); + JsonVal& out = heads.list[i]; + out.list.emplace_back(nodeMap[outputs[i].node]); + out.list.emplace_back(outputs[i].entry); + out.list.emplace_back(0); + } + + // add all graph nodes + val.map[JsonVal("nodes")] = JsonVal(LIST); + JsonVal& nodes_ = val.map[JsonVal("nodes")]; + for (int i = sorted.size()-1; i >= 0; i--) { + // each node is a map + nodes_.list.emplace_back(MAP); + Node* n = sorted[i]; + JsonVal& n_ = nodes_.list[nodes_.list.size()-1]; + + n_.map[JsonVal("op")] = JsonVal(n->op); + n_.map[JsonVal("name")] = JsonVal(n->name); + n_.map[JsonVal("inputs")] = JsonVal(LIST); + + // add inputs for this node + JsonVal& inputs_ = n_.map[JsonVal("inputs")]; + for (int j = 0; j < n->inputs.size(); j++) { + inputs_.list.emplace_back(LIST); + NodeEntry& entry = n->inputs[j]; + JsonVal& in = inputs_.list[j]; + in.list.emplace_back(nodeMap[entry.node]); + in.list.emplace_back(entry.entry); + in.list.emplace_back(0); + } + + // add subgraphs for this node, convert each back to JSON + if (n->subgraphs.size() > 0) { + n_.map[JsonVal("subgraphs")] = JsonVal(LIST); + JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")]; + for (Graph *subgraph : n->subgraphs) { + subgraphs_.list.push_back(subgraph->toJson()); + } + } + + // add attributes for this node + n_.map[JsonVal("attrs")] = JsonVal(MAP); + JsonVal& attrs_ = n_.map[JsonVal("attrs")]; + for (auto& kv : n->attrs) { + attrs_.map[JsonVal(kv.first)] = JsonVal(kv.second); + } + } + return val; +} + +/* \brief convert graph object to JSON string */ +std::string mxnet::ext::Graph::toString() { + return toJson().dump(); +} + + /* \brief visits a node "n" */ +void mxnet::ext::Graph::_dfs_util(Node* n, std::unordered_set* to_visit, + std::function handler) const { + to_visit->erase(n); // remove node now that we're visiting it + for (NodeEntry& e : n->outputs) { + Node* o = e.node; + if (to_visit->count(o) != 0) { + _dfs_util(o, to_visit, handler); // visit neighbor + } + } + handler(n); // post-order visit this node +} + +/* \brief post-order DFS graph traversal */ +void mxnet::ext::Graph::DFS(std::function handler) const { + std::unordered_set to_visit; + // put all nodes in set to visit + for (auto& n : nodes) + to_visit.insert(n); + // visit all inputs first + for (auto& i : inputs) + if (to_visit.count(i) != 0) + _dfs_util(i, &to_visit, handler); + // visit any nodes left + while (to_visit.size() > 0) + _dfs_util(*(to_visit.begin()), &to_visit, handler); +} + +/* \brief sort graph nodes in topological order */ +std::vector mxnet::ext::Graph::topological_sort() const { + std::vector sorted; + auto handler = [&](mxnet::ext::Node* n) { + sorted.push_back(n); // when visiting each node, add it in order to the vector + }; + DFS(handler); + return sorted; +} + +/* \brief print out graph details */ +void mxnet::ext::Graph::print(int indent) const { + std::string space = ""; + for (int i = 0; i < indent; i++) space+=" "; + + std::cout << space << "########### Graph #############" << std::endl; + std::cout << space << "attributes: " << std::endl; + for (auto &kv : attrs) + std::cout << space << "\t" << kv.first << " : " << kv.second.str << std::endl; + std::cout << space << "inputs: " << inputs.size() << std::endl; + std::cout << space << "outputs: " << outputs.size() << std::endl; + std::cout << space << "nodes: " << nodes.size() << std::endl; + std::vector sorted = topological_sort(); + // loop over each node and print out its inputs/outputs + for (int i = sorted.size()-1; i >= 0; i--) { + std::cout << space << "Node: " << sorted[i]->name << std::endl; + for (auto &input : sorted[i]->inputs) { + std::cout << space << "\tInput: " << input.node->name << " " + << input.entry << std::endl; + } + for (auto &output : sorted[i]->outputs) { + std::cout << space << "\tOutput: " << output.node->name << " " + << output.entry << std::endl; + } + if (sorted[i]->subgraphs.size() > 0) { + for (auto &subgraph : sorted[i]->subgraphs) { + std::cout << space << "\tSubgraph:" << std::endl; + subgraph->print(indent+2); + } + } + } + std::cout << space << "###############################" << std::endl; +} + +/* \brief add a new node to this graph */ +mxnet::ext::Node* mxnet::ext::Graph::addNode(const std::string& name, const std::string& op) { + Node* n = new Node(); + n->name = name; + n->op = op; + if (res) + n->_setPassResource(res); + return n; +} + +/* \brief get node at index in graph */ +mxnet::ext::Node* mxnet::ext::Graph::getNode(size_t idx) { + return nodes[idx]; +} + +/* \brief get const node at index in const graph */ +const mxnet::ext::Node* mxnet::ext::Graph::getNode(size_t idx) const { + return nodes.at(idx); +} + +/* \brief get attribute on graph */ +const mxnet::ext::JsonVal& mxnet::ext::Graph::getAttr(const std::string& key) const { + return attrs.at(key); +} + +/* \brief get number of nodes in the graph */ +size_t mxnet::ext::Graph::size() const { + return nodes.size(); +} + +// internally set passResource to enable tensor allocation for graph passes +void mxnet::ext::Graph::_setPassResource(PassResource* res_) { + res = res_; + // set passResource for each node + for (Node* node : nodes) { + node->_setPassResource(res); + } +} + +// internally set arg/aux params when available +void mxnet::ext::Graph::_setParams(std::unordered_map* args, + std::unordered_map* aux) { + // set params for each input node + for (Node* node : inputs) { + if (args->count(node->name) > 0) + node->tensor = &args->at(node->name); + else if (aux->count(node->name) > 0) + node->tensor = &aux->at(node->name); + } +} + +mxnet::ext::CustomOp::CustomOp(const char* op_name) + : name(op_name), parse_attrs(nullptr), infer_type(nullptr), infer_storage_type(nullptr), + infer_shape(nullptr), mutate_inputs(nullptr), isSGop(false) {} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setForward(mxnet::ext::fcomp_t fcomp, const char* ctx) { + if (forward_ctx_map.count(ctx) > 0) + raiseDuplicateContextError(); + forward_ctx_map[ctx] = fcomp; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setBackward(mxnet::ext::fcomp_t fgrad, + const char* ctx) { + if (backward_ctx_map.count(ctx) > 0) + raiseDuplicateContextError(); + backward_ctx_map[ctx] = fgrad; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setParseAttrs(mxnet::ext::parseAttrs_t func) { + parse_attrs = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setInferType(mxnet::ext::inferType_t func) { + infer_type = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setInferSType(mxnet::ext::inferSType_t func) { + infer_storage_type = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setInferShape(mxnet::ext::inferShape_t func) { + infer_shape = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setMutateInputs(mxnet::ext::mutateInputs_t func) { + mutate_inputs = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setCreateOpState(mxnet::ext::createOpState_t func, + const char* ctx) { + if (create_op_ctx_map.count(ctx) > 0) + raiseDuplicateContextError(); + create_op_ctx_map[ctx] = func; + return *this; +} + +mxnet::ext::CustomOp& mxnet::ext::CustomOp::setIsSubgraphOp() { + isSGop = true; + return *this; +} + +void mxnet::ext::CustomOp::mapToVector() { + for (auto kv : forward_ctx_map) { + forward_ctx_cstr.push_back(kv.first); + forward_fp.push_back(kv.second); + } + for (auto kv : backward_ctx_map) { + backward_ctx_cstr.push_back(kv.first); + backward_fp.push_back(kv.second); + } + for (auto kv : create_op_ctx_map) { + create_op_ctx_cstr.push_back(kv.first); + create_op_fp.push_back(kv.second); + } +} + +void mxnet::ext::CustomOp::raiseDuplicateContextError() { + std::string op_name_str(name); + throw std::runtime_error( + "Error! Error! Cannot register multiple functions under same context for operator '" + + op_name_str + "'"); +} + +mxnet::ext::CustomPass::CustomPass() : name("ERROR") {} +mxnet::ext::CustomPass::CustomPass(const char* pass_name) + : name(pass_name) {} +mxnet::ext::CustomPass& mxnet::ext::CustomPass::setBody(graphPass_t fn) { + pass = fn; + return *this; +} + +mxnet::ext::CustomPartitioner::CustomPartitioner() : name("ERROR") {} +mxnet::ext::CustomPartitioner::CustomPartitioner(const char* backend_name) : + name(backend_name) {} + +mxnet::ext::CustomPartitioner& mxnet::ext::CustomPartitioner::addStrategy(const char* prop_name, + const char* sg_name) { + strategies.push_back(prop_name); + op_names.push_back(sg_name); + return *this; +} + +mxnet::ext::CustomPartitioner& mxnet::ext::CustomPartitioner::setSupportedOps(const char* prop_name, + mxnet::ext::supportedOps_t fn) { + supported_map[std::string(prop_name)] = fn; + return *this; +} + +mxnet::ext::CustomPartitioner& mxnet::ext::CustomPartitioner::setCreateSelector( + const char* prop_name, mxnet::ext::createSelector_t fn) { + selector_map[std::string(prop_name)] = fn; + return *this; +} + +mxnet::ext::CustomPartitioner& mxnet::ext::CustomPartitioner::setReviewSubgraph( + const char* prop_name, mxnet::ext::reviewSubgraph_t fn) { + review_map[std::string(prop_name)] = fn; + return *this; +} + +mxnet::ext::supportedOps_t mxnet::ext::CustomPartitioner::getSupportedOps(int stg_id) { + std::string prop(strategies[stg_id]); + if (supported_map.count(prop) > 0) + return supported_map[prop]; + else + return nullptr; +} + +mxnet::ext::createSelector_t mxnet::ext::CustomPartitioner::getCreateSelector(int stg_id) { + std::string prop(strategies[stg_id]); + if (selector_map.count(prop) > 0) + return selector_map[prop]; + else + return nullptr; +} + +mxnet::ext::reviewSubgraph_t mxnet::ext::CustomPartitioner::getReviewSubgraph(int stg_id) { + std::string prop(strategies[stg_id]); + if (review_map.count(prop) > 0) + return review_map[prop]; + else + return nullptr; +} + +/*! \brief returns MXNet library version */ +MX_INT_RET _opVersion() { + return MX_LIBRARY_VERSION; +} + +/*! \brief returns number of ops registered in this library */ +MX_INT_RET _opRegSize() { + return mxnet::ext::Registry::get()->size(); +} + +/*! \brief returns operator registration at specified index */ +MX_VOID_RET _opRegGet(int idx, const char** name, int *isSGop, + const char*** forward_ctx, mxnet::ext::fcomp_t** forward_fp, + int* forward_count, const char*** backward_ctx, + mxnet::ext::fcomp_t** backward_fp, int* backward_count, + const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp, + int* create_op_count, mxnet::ext::parseAttrs_t* parse, + mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype, + mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate) { + mxnet::ext::CustomOp &op = mxnet::ext::Registry::get()->get(idx); + *name = op.name; + *parse = op.parse_attrs; + *type = op.infer_type; + *stype = op.infer_storage_type; + *shape = op.infer_shape; + *mutate = op.mutate_inputs; + *isSGop = op.isSGop; + op.mapToVector(); + *forward_ctx = op.forward_ctx_cstr.data(); + *forward_fp = op.forward_fp.data(); + *forward_count = op.forward_fp.size(); + *backward_ctx = op.backward_ctx_cstr.data(); + *backward_fp = op.backward_fp.data(); + *backward_count = op.backward_fp.size(); + *create_op_ctx = op.create_op_ctx_cstr.data(); + *create_op_fp = op.create_op_fp.data(); + *create_op_count = op.create_op_fp.size(); +} + +/*! \brief calls free from the external library for library allocated arrays */ +MX_VOID_RET _opCallFree(void* ptr) { + free(ptr); +} + +/*! \brief returns status of calling parse attributes function for operator from library */ +MX_INT_RET _opCallParseAttrs(mxnet::ext::parseAttrs_t parseAttrs, const char* const* keys, + const char* const* vals, int num, + int* num_in, int* num_out) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + return parseAttrs(attrs, num_in, num_out); +} + +/*! \brief returns status of calling inferShape function for operator from library */ +MX_INT_RET _opCallInferShape(mxnet::ext::inferShape_t inferShape, const char* const* keys, + const char* const* vals, int num, + unsigned int** inshapes, int* indims, int num_in, + unsigned int*** mod_inshapes, int** mod_indims, + unsigned int*** outshapes, int** outdims, int num_out) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of shapes for inputs + std::vector > in_shapes(num_in); + for (int i = 0; i < num_in; i++) { + for (int j = 0; j < indims[i]; j++) { + in_shapes[i].push_back(inshapes[i][j]); + } + } + + // create a vector of shapes for outputs + std::vector > out_shapes(num_out); + + int retval = inferShape(attrs, &in_shapes, &out_shapes); + if (!retval) return retval; + + // allocate space for modified input dims, shape + *mod_indims = static_cast(malloc (num_in * sizeof(int))); + *mod_inshapes = static_cast(malloc (num_in * sizeof(unsigned*))); + + // copy modified input shapes + for (int i = 0; i < num_in; i++) { + (*mod_indims)[i] = in_shapes[i].size(); + (*mod_inshapes)[i] = static_cast(malloc ((*mod_indims)[i] * sizeof(unsigned))); + for (int j = 0; j < (*mod_indims)[i]; j++) { + (*mod_inshapes)[i][j] = in_shapes[i][j]; + } + } + + // allocate space for output dims, shape + *outdims = static_cast(malloc (num_out * sizeof(int))); + *outshapes = static_cast(malloc (num_out * sizeof(unsigned*))); + + // copy output shapes + for (int i = 0; i < num_out; i++) { + (*outdims)[i] = out_shapes[i].size(); + (*outshapes)[i] = static_cast(malloc ((*outdims)[i] * sizeof(unsigned))); + for (int j = 0; j < (*outdims)[i]; j++) { + (*outshapes)[i][j] = out_shapes[i][j]; + } + } + return retval; +} + +/*! \brief returns status of calling inferType function for operator from library */ +MX_INT_RET _opCallInferType(mxnet::ext::inferType_t inferType, const char* const* keys, + const char* const* vals, int num, + int* intypes, int num_in, int* outtypes, int num_out) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of types for inputs + std::vector in_types(num_in); + for (int i = 0; i < num_in; i++) { + in_types[i] = intypes[i]; + } + + // create a vector of types for outputs + std::vector out_types(num_out, -1); + + int retval = inferType(attrs, &in_types, &out_types); + if (!retval) + return retval; + + // copy modified input types + for (int i = 0; i < num_in; i++) { + intypes[i] = in_types[i]; + } + // copy output types + for (int i = 0; i < num_out; i++) { + outtypes[i] = out_types[i]; + } + + return retval; +} + +/*! \brief returns status of calling inferSType function for operator from library */ +MX_INT_RET _opCallInferSType(mxnet::ext::inferSType_t inferSType, const char* const* keys, + const char* const* vals, int num, + int* instypes, int num_in, int* outstypes, int num_out) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of types for inputs + std::vector in_stypes(num_in); + for (int i = 0; i < num_in; i++) { + in_stypes[i] = instypes[i]; + } + + // create a vector of types for outputs + std::vector out_stypes(num_out, -1); + + int retval = inferSType(attrs, &in_stypes, &out_stypes); + + if (!retval) + return retval; + + // copy modified input storage types + for (int i = 0; i < num_in; i++) { + instypes[i] = in_stypes[i]; + } + // copy output storage types + for (int i = 0; i < num_out; i++) { + outstypes[i] = out_stypes[i]; + } + + return retval; +} + +/*! \brief returns status of calling Forward/Backward function for operator from library */ +MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys, + const char* const* vals, + int num, const int64_t** inshapes, int* indims, void** indata, + int* intypes, size_t* inIDs, const char** indev_type, int* indev_id, + int num_in, const int64_t** outshapes, int* outdims, void** outdata, + int* outtypes, size_t* outIDs, const char** outdev_type, + int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc, + void* cpu_alloc, + mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc, + void* cuda_stream, + mxnet::ext::sparse_malloc_t sparse_malloc, void* sparse_alloc, + int* instypes, int* outstypes, void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, + void* rng_cpu_states, void* rng_gpu_states) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of tensors for inputs + std::vector inputs(num_in); + // create a vector for sparse inputs + std::vector in_sparse(num_in); + + for (int i = 0; i < num_in; i++) { + // Dense representation. + if (instypes[i] == 0) { + inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]), + mxnet::ext::kDefaultStorage); + } else { + // Sparse representation. + mxnet::ext::MXStorageType type; + if (instypes[i] == 1) { + type = mxnet::ext::kRowSparseStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + } else { + type = mxnet::ext::kCSRStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + } + inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i], + inshapes[i], indims[i], inIDs[i], + mxnet::ext::MXContext(indev_type[i], indev_id[i]), type); + } + } + + // create a vector of tensors for outputs + std::vector outputs(num_out); + std::vector out_sparse(num_out); + + for (int i = 0; i < num_out; i++) { + // Dense representation. + if (outstypes[i] == 0) { + outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), + mxnet::ext::kDefaultStorage); + } else { + // Sparse representation. + mxnet::ext::MXStorageType type; + if (outstypes[i] == 1) { + type = mxnet::ext::kRowSparseStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], + out_indices[i], out_indices_shapes[i]); + } else { + type = mxnet::ext::kCSRStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); + } + outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), + (mxnet::ext::MXDType)outtypes[i], + outshapes[i], outdims[i], outIDs[i], + mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type); + } + } + + mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, + cuda_stream, sparse_malloc, sparse_alloc, + rng_cpu_states, rng_gpu_states); + return fcomp(attrs, &inputs, &outputs, res); +} + +/*! \brief returns status of calling mutateInputs function for operator from library */ +MX_INT_RET _opCallMutateInputs(mxnet::ext::mutateInputs_t mutate, const char* const* keys, + const char* const* vals, int num, + int** mutate_indices, int* indices_size) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of mutate input indices + std::vector mut_ind; + + int retval = mutate(attrs, &mut_ind); + if (!retval) + return retval; + + // output the input indices + *indices_size = mut_ind.size(); + *mutate_indices = static_cast(malloc (*indices_size * sizeof(int))); + for (int i = 0; i < *indices_size; i++) { + (*mutate_indices)[i] = mut_ind[i]; + } + + return retval; +} + +/*! \brief returns status of calling createStatefulOp function for operator from library */ +MX_INT_RET _opCallCreateOpState(mxnet::ext::createOpState_t create_op, const char* const* keys, + const char* const* vals, int num, + void** state_op) { + // create map of attributes from list + std::unordered_map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // void pointer to hold custom state op instance created in custom library + // eventually state_op pointer is populated by instance from custom library + mxnet::ext::CustomStatefulOp** op_ptr = + reinterpret_cast(state_op); + return create_op(attrs, op_ptr); +} + +/*! \brief returns status of calling Stateful Forward/Backward for operator from library */ +MX_INT_RET _opCallFStatefulCompute(int is_forward, void* state_op, const int64_t** inshapes, + int* indims, void** indata, int* intypes, size_t* inIDs, + const char** indev_type, int* indev_id, int num_in, + const int64_t** outshapes, int* outdims, void** outdata, + int* outtypes, size_t* outIDs, const char** outdev_type, + int* outdev_id, int num_out, + mxnet::ext::xpu_malloc_t cpu_malloc, + void* cpu_alloc, mxnet::ext::xpu_malloc_t gpu_malloc, + void* gpu_alloc, + void* stream, mxnet::ext::sparse_malloc_t sparse_malloc, + void* sparse_alloc, int* instypes, int* outstypes, + void** in_indices, void** out_indices, void** in_indptr, + void** out_indptr, int64_t* in_indices_shapes, + int64_t* out_indices_shapes, int64_t* in_indptr_shapes, + int64_t* out_indptr_shapes, + void* rng_cpu_states, void* rng_gpu_states) { + // create a vector of tensors for inputs + std::vector inputs(num_in); + // create a vector for sparse inputs + std::vector in_sparse(num_in); + + for (int i = 0; i < num_in; i++) { + if (instypes[i] == 0) { + // Dense representation. + inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]), + mxnet::ext::kDefaultStorage); + } else { + // Sparse representation. + mxnet::ext::MXStorageType type; + if (instypes[i] == 1) { + type = mxnet::ext::kRowSparseStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + } else { + type = mxnet::ext::kCSRStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + } + inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i], + inshapes[i], indims[i], inIDs[i], + mxnet::ext::MXContext(indev_type[i], indev_id[i]), type); + } + } + + // create a vector of tensors for outputs + std::vector outputs(num_out); + // create a vector for sparse outputs + std::vector out_sparse(num_out); + + for (int i = 0; i < num_out; i++) { + if (outstypes[i] == 0) { + // Dense representation. + outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), + mxnet::ext::kDefaultStorage); + } else { + // Sparse representation. + mxnet::ext::MXStorageType type; + if (outstypes[i] == 1) { + type = mxnet::ext::kRowSparseStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i]); + } else { + type = mxnet::ext::kCSRStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); + } + outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), + (mxnet::ext::MXDType)outtypes[i], + outshapes[i], outdims[i], outIDs[i], + mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type); + } + } + + mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, + stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states); + + mxnet::ext::CustomStatefulOp* op_ptr = + reinterpret_cast(state_op); + if (is_forward) { + return op_ptr->Forward(&inputs, &outputs, res); + } + return op_ptr->Backward(&inputs, &outputs, res); +} + +/*! \brief returns number of partitioners registered in this library */ +MX_INT_RET _partRegSize() { + return mxnet::ext::Registry::get()->size(); +} + +/* returns number of strategies registered for partitioner + * at specified index */ +MX_INT_RET _partRegGetCount(int idx, const char** name) { + mxnet::ext::CustomPartitioner part = + mxnet::ext::Registry::get()->get(idx); + *name = part.name; + return part.strategies.size(); +} + +/*! \brief returns partitioner registration at specified index */ +MX_VOID_RET _partRegGet(int part_idx, int stg_idx, const char** strategy, + mxnet::ext::supportedOps_t* supportedOps, + mxnet::ext::createSelector_t* createSelector, + mxnet::ext::reviewSubgraph_t* reviewSubgraph, const char** op_name) { + mxnet::ext::CustomPartitioner part = + mxnet::ext::Registry::get()->get(part_idx); + *strategy = part.strategies[stg_idx]; + *op_name = part.op_names[stg_idx]; + *supportedOps = part.getSupportedOps(stg_idx); + *createSelector = part.getCreateSelector(stg_idx); + *reviewSubgraph = part.getReviewSubgraph(stg_idx); +} + +/*! \brief returns status of calling supported ops function from library */ +MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, const char *json, + int num_ids, int *ids, const char* const* opt_keys, + const char* const* opt_vals, int num_opts) { + mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); + // create map of options from list + std::unordered_map opts; + for (int i = 0; i < num_opts; i++) + opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); + + // create array of subgraph IDs for operator support + std::vector _ids(num_ids, -2); + // call user's supportedOps function + mxnet::ext::MXReturnValue retval = supportedOps(graph, &_ids, opts); + if (!retval) return retval; + + // copy bools in ids to ints + for (int i = 0; i < num_ids; i++) + ids[i] = _ids[i]; + + return retval; +} + +/*! \brief returns status of calling create selector function from library */ +MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, const char *json, + void** selector, const char* const* opt_keys, + const char* const* opt_vals, int num_opts) { + mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); + // create map of options from list + std::unordered_map opts; + for (int i = 0; i < num_opts; i++) + opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); + + // void pointer to hold selector instance created in custom library + // eventually pointer is populated by instance from custom library + mxnet::ext::CustomOpSelector** sel_ptr = + reinterpret_cast(selector); + + // call user's createSelector function + return createSelector(graph, sel_ptr, opts); +} + +/*! \brief returns status of calling select function from library */ +MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected) { + mxnet::ext::CustomOpSelector* sel_ptr = + reinterpret_cast(sel_inst); + *selected = sel_ptr->Select(nodeID); +} + +/*! \brief returns status of calling select input function from library */ +MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID, + int input_nodeID, int* selected) { + mxnet::ext::CustomOpSelector* sel_ptr = + reinterpret_cast(sel_inst); + *selected = sel_ptr->SelectInput(nodeID, input_nodeID); +} + +/*! \brief returns status of calling select output function from library */ +MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID, + int output_nodeID, int* selected) { + mxnet::ext::CustomOpSelector* sel_ptr = + reinterpret_cast(sel_inst); + *selected = sel_ptr->SelectOutput(nodeID, output_nodeID); +} + +/*! \brief returns status of calling filter function from library */ +MX_VOID_RET _partCallFilter(void* sel_inst, int* candidates, int num_candidates, + int** keep, int* num_keep) { + mxnet::ext::CustomOpSelector* sel_ptr = + reinterpret_cast(sel_inst); + std::vector candidates_(num_candidates); + for (int i=0; i < num_candidates; i++) { + candidates_[i] = candidates[i]; + } + std::vector keep_; + + sel_ptr->Filter(candidates_, &keep_); + + *num_keep = keep_.size(); + *keep = static_cast(malloc(keep_.size() * sizeof(int))); + for (unsigned i=0; i < keep_.size(); i++) + (*keep)[i] = keep_[i]; +} + +/*! \brief returns status of calling reset selector function from library */ +MX_VOID_RET _partCallReset(void* sel_inst) { + mxnet::ext::CustomOpSelector* sel_ptr = + reinterpret_cast(sel_inst); + sel_ptr->Reset(); +} + +/*! \brief returns status of calling review subgraph function from library */ +MX_INT_RET _partCallReviewSubgraph(mxnet::ext::reviewSubgraph_t reviewSubgraph, const char *json, + int subgraph_id, int *accept, const char* const* opt_keys, + const char* const* opt_vals, int num_opts, + char*** attr_keys, char*** attr_vals, int *num_attrs, + const char* const* arg_names, int num_args, + void* const* arg_data, const int64_t* const* arg_shapes, + const int* arg_dims, const int* arg_types, + const size_t* arg_IDs, const char* const* arg_dev_type, + const int* arg_dev_id, + const char* const* aux_names, int num_aux, + void* const* aux_data, const int64_t* const* aux_shapes, + const int* aux_dims, const int* aux_types, + const size_t* aux_IDs, const char* const* aux_dev_type, + const int* aux_dev_id) { + mxnet::ext::Graph *subgraph = mxnet::ext::Graph::fromString(json); + bool accept_bool = false; + // create map of attributes from list + std::unordered_map opts; + for (int i = 0; i < num_opts; i++) + opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); + + // create a map of named tensors for args + std::unordered_map args; + for (int i = 0; i < num_args; i++) { + std::vector shapes; + for (int j = 0; j < arg_dims[i]; j++) + shapes.push_back(arg_shapes[i][j]); + + mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i], + arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], arg_dev_id[i])); + args[arg_names[i]] = tensor; + } + // create a map of named tensors for aux + std::unordered_map aux; + for (int i = 0; i < num_aux; i++) { + std::vector shapes; + for (int j = 0; j < aux_dims[i]; j++) + shapes.push_back(aux_shapes[i][j]); + + mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i], + aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], + aux_dev_id[i])); + aux[aux_names[i]] = tensor; + } + + subgraph->_setParams(&args, &aux); + mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph, subgraph_id, &accept_bool, + opts); + if (!retval) return retval; + + *accept = accept_bool; + + if (subgraph->attrs.size() > 0) { + *num_attrs = subgraph->attrs.size(); + // allocate space for attributes + *attr_keys = static_cast(malloc (*num_attrs * sizeof(char*))); + *attr_vals = static_cast(malloc (*num_attrs * sizeof(char*))); + + // copy attributes + int i = 0; + for (auto kv : subgraph->attrs) { + (*attr_keys)[i] = static_cast(malloc ((kv.first.size()+1) * sizeof(char))); + std::string val = kv.second.dump(); // convert JsonVal back to string + (*attr_vals)[i] = static_cast(malloc ((val.size()+1) * sizeof(char))); + snprintf((*attr_keys)[i], kv.first.size()+1, "%s", kv.first.c_str()); + snprintf((*attr_vals)[i], val.size()+1, "%s", val.c_str()); + i++; + } + } + + return retval; +} + +/*! \brief returns number of graph passes registered in this library */ +MX_INT_RET _passRegSize() { + return mxnet::ext::Registry::get()->size(); +} + +/*! \brief returns pass registration at specified index */ +MX_VOID_RET _passRegGet(int pass_idx, mxnet::ext::graphPass_t* graphPass, + const char** pass_name) { + mxnet::ext::CustomPass pass = + mxnet::ext::Registry::get()->get(pass_idx); + *graphPass = pass.pass; + *pass_name = pass.name; +} + +/*! \brief returns status of calling graph pass function from library */ +MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, const char *json, + char** out_graph, const char* const* opt_keys, + const char* const* opt_vals, int num_opts, + const char* pass_name, const char* const* arg_names, int num_args, + void* const* arg_data, const int64_t* const* arg_shapes, + const int* arg_dims, const int* arg_types, + const size_t* arg_IDs, const char* const* arg_dev_type, + const int* arg_dev_id, const char* const* aux_names, int num_aux, + void* const* aux_data, const int64_t* const* aux_shapes, + const int* aux_dims, const int* aux_types, + const size_t* aux_IDs, const char* const* aux_dev_type, + const int* aux_dev_id, mxnet::ext::nd_malloc_t nd_malloc, + const void* nd_alloc) { + mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json); + // create map of attributes from list + std::unordered_map opts; + for (int i = 0; i < num_opts; i++) + opts[std::string(opt_keys[i])] = std::string(opt_vals[i]); + + // create a map of named tensors for args + std::unordered_map args; + for (int i = 0; i < num_args; i++) { + std::vector shapes; + for (int j = 0; j < arg_dims[i]; j++) + shapes.push_back(arg_shapes[i][j]); + + mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i], + arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], + arg_dev_id[i])); + args[arg_names[i]] = tensor; + } + // create a map of named tensors for aux + std::unordered_map aux; + for (int i = 0; i < num_aux; i++) { + std::vector shapes; + for (int j = 0; j < aux_dims[i]; j++) + shapes.push_back(aux_shapes[i][j]); + + mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i], + aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], + aux_dev_id[i])); + aux[aux_names[i]] = tensor; + } + + std::unordered_map new_args, new_aux; + mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc); + graph->_setParams(&args, &aux); + graph->_setPassResource(&res); + mxnet::ext::MXReturnValue retval = graphPass(graph, opts); + if (!retval) return retval; + + std::string *tmp = new std::string(graph->toString()); + *out_graph = const_cast(tmp->c_str()); + return retval; +} + +/*! + * \brief Checks if the MXNet version is supported by the library. + * If supported, initializes the library. + * \param version MXNet version number passed to library and defined as: + * MXNET_VERSION = (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) + * \return Non-zero value on error i.e. library incompatible with passed MXNet version + */ +#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) +__declspec(dllexport) mxnet::ext::MXReturnValue __cdecl +#else +mxnet::ext::MXReturnValue +#endif +initialize(int version); + +MX_INT_RET _msgSize() { + return mxnet::ext::MXerrorMsgs::get()->size(); +} + +/*! \brief returns operator registration at specified index */ +MX_VOID_RET _msgGet(int idx, const char** msg) { + *msg = mxnet::ext::MXerrorMsgs::get()->get(idx)->c_str(); +}