From 9dfac79d5acb9e9a195c86aae919a6acaf3de4bc Mon Sep 17 00:00:00 2001 From: Sam Skalicky Date: Mon, 14 Sep 2020 22:57:05 -0700 Subject: [PATCH] [1.x] Backport Fix for duplicate subgraph inputs/outputs (#16131) (#19112) * Fix for duplicate subgraph inputs/outputs (#16131) * fix for duplicate inputs * fixed error * fixed whitespace * Remove duplicate outputs from subgraphs * changed subgraph to create map of outputs * added static_cast * changed map to vector * sanity fix * sanity2 * updated backends with new connectSubgraphOutputs API * fixed map creation logic * added updates for reattach function * creating node only if it is not an input to subgraph * creating object based on var_name only * updating ConnectSubgraphOutputs for mkldnn_elemwisemul_post_quantize_property.h * add debug prints to debug error in CI * remove prints * added prints to debug in the CI * revert changes * reverted changes * deduplicaated inputs to subgraph * deduplicated subgraph inputs * simplified inputs * cleaned up * deduplicate outputs * cleand up * added deduplication to subgraph node outputs * fixed prev compare * fixed issue with inputs and added test * fixd whitespace, removed prints Co-authored-by: Sam Skalicky Co-authored-by: Ubuntu Co-authored-by: Ubuntu Co-authored-by: Manu Seth <22492939+mseth10@users.noreply.github.com> Co-authored-by: Ubuntu * added flag to enable dedupe ondemand * fixed dedup logic * improved dedup logic * fixed sanity * propogated option * check option in custom subgraph prop * fixed options map * fixed missing * added dedup to subgraph_prop base class for testing * added test for dedup * added comments Co-authored-by: Sam Skalicky Co-authored-by: Ubuntu Co-authored-by: Ubuntu Co-authored-by: Manu Seth <22492939+mseth10@users.noreply.github.com> Co-authored-by: Ubuntu --- .../extensions/lib_subgraph/test_subgraph.py | 19 ++--- src/c_api/c_api_symbolic.cc | 5 ++ src/operator/subgraph/build_subgraph.cc | 70 +++++++++++++++---- .../partitioner/custom_subgraph_property.h | 7 +- src/operator/subgraph/subgraph_property.h | 28 +++++++- tests/python/unittest/test_subgraph_op.py | 49 ++++++++++++- 6 files changed, 146 insertions(+), 32 deletions(-) diff --git a/example/extensions/lib_subgraph/test_subgraph.py b/example/extensions/lib_subgraph/test_subgraph.py index 5294e1c209a3..a8b6690af80c 100644 --- a/example/extensions/lib_subgraph/test_subgraph.py +++ b/example/extensions/lib_subgraph/test_subgraph.py @@ -63,7 +63,8 @@ def test(backend): # with propogating shapes/types print('-------------------------------') print('Testing %s partitioning with shapes/types' % backend) - mysym2 = sym.optimize_for(backend,args) + print(sym.tojson()) + mysym2 = sym.optimize_for(backend, args, dedup_subgraph=True) print(mysym2.tojson()) exe2 = mysym2.bind(ctx=mx.cpu(), args=args) out2 = exe2.forward() @@ -72,7 +73,7 @@ def test(backend): # with propogating shapes/types, rejecting subgraph print('-------------------------------') print('Testing %s partitioning with shapes/types - rejecting subgraph' % backend) - mysym2 = sym.optimize_for(backend, args, reject=True) + mysym2 = sym.optimize_for(backend, args, reject=True, dedup_subgraph=True) exe2 = mysym2.bind(ctx=mx.cpu(), args=args) out2 = exe2.forward() print(out2) @@ -80,7 +81,7 @@ def test(backend): # without propogating shapes/types print('-------------------------------') print('Testing %s partitioning without shapes/types' % backend) - mysym3 = sym.optimize_for(backend, myOpt='yello') + mysym3 = sym.optimize_for(backend, myOpt='yello', dedup_subgraph=True) exe3 = mysym3.bind(ctx=mx.cpu(), args=args) out3 = exe3.forward() print(out3) @@ -91,7 +92,7 @@ def test(backend): inputs = [a,b] sym_block = nn.SymbolBlock(sym, inputs) sym_block.initialize() - sym_block.hybridize(backend=backend) + sym_block.hybridize(backend=backend, backend_opts={'dedup_subgraph':True}) out2 = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2))) print(out2) @@ -101,13 +102,15 @@ def test(backend): inputs = [a,b] sym_block2 = nn.SymbolBlock(sym, inputs) sym_block2.initialize() - sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend=backend) + sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend=backend, + backend_opts={'dedup_subgraph':True}) sym_block2.export('partitioned') # Test with additional input to subgraph op print('-------------------------------') print('Testing %s Gluon Hybridize partitioning with extra input' % backend) - sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass", clear=False) + sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass", + clear=False, backend_opts={'dedup_subgraph':True}) out3 = sym_block2(mx.nd.ones((3,2)),mx.nd.ones((3,2))) print(out3) @@ -125,7 +128,7 @@ def test(backend): # with propogating shapes/types print('-------------------------------') print('Testing %s partitioning with shapes/types' % backend) - mysym6 = sym2.optimize_for(backend, args, reqArgs=True) + mysym6 = sym2.optimize_for(backend, args, reqArgs=True, dedup_subgraph=True) print(mysym6.tojson()) exe6 = mysym6.bind(ctx=mx.cpu(), args=args) out6 = exe6.forward() @@ -134,7 +137,7 @@ def test(backend): # without propogating shapes/types print('-------------------------------') print('Testing %s partitioning without shapes/types' % backend) - mysym7 = sym2.optimize_for(backend, reqArgs=True) + mysym7 = sym2.optimize_for(backend, reqArgs=True, dedup_subgraph=True) exe7 = mysym7.bind(ctx=mx.cpu(), args=args) out7 = exe7.forward() print(out7) diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 32c5b1c0de6f..ccc0547e8f7d 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -1484,6 +1484,11 @@ int MXOptimizeForBackend(SymbolHandle sym_handle, for (mx_uint i = 0; i < num_options; ++i) options_map.emplace(keys[i], vals[i]); + // set dedup option as attribute on graph to enable dedup during partitioning + if (options_map.count("dedup_subgraph") > 0 && + options_map.at("dedup_subgraph").compare("True") == 0) + g.attrs["dedup_subgraph"] = std::make_shared(std::string("True")); + if (mxnet::op::SubgraphBackendRegistry::Get()->backend_map_.count(backend_name) > 0) { // use subgraph backend const auto backend = mxnet::op::SubgraphBackendRegistry diff --git a/src/operator/subgraph/build_subgraph.cc b/src/operator/subgraph/build_subgraph.cc index 7cf9671b92b4..ba6ba03129c8 100644 --- a/src/operator/subgraph/build_subgraph.cc +++ b/src/operator/subgraph/build_subgraph.cc @@ -537,33 +537,49 @@ void FindOutputEntries(nnvm::Graph* g, */ void CutGraphInputs(const std::vector &input_entries, std::vector *orig_entries, - const bool skip_var = false) { + std::vector *unique_orig_entries, + std::vector *unique_input_entries, + const bool skip_var = false, + const bool dedup = false) { orig_entries->resize(input_entries.size()); // map for creating unique var nodes for deduplicating entries from the same node + std::unordered_map name_map; std::unordered_map name_count_map; + for (size_t i = 0; i < input_entries.size(); ++i) { nnvm::NodeEntry *e = input_entries[i]; // If the node is a variable itself, we may want to skip the node. if (e->node->is_variable() && skip_var) { continue; } - + // save all original entries orig_entries->at(i) = *e; + // get unique name for this entry nnvm::Symbol sym; sym.outputs.push_back(*e); const auto output_names = sym.ListOutputNames(); CHECK_EQ(output_names.size(), 1U); const std::string& var_name = output_names[0]; - auto it = name_count_map.find(var_name); - if (name_count_map.end() == it) { + // check if this entry is a duplicate + if (name_count_map.count(var_name) == 0) { + // first use of this node as input to subgraph name_count_map.emplace(var_name, 0); + unique_orig_entries->push_back(*e); + unique_input_entries->push_back(e); + nnvm::ObjectPtr n = nnvm::CreateVariableNode(var_name + std::to_string(0)); + name_map.emplace(var_name, nnvm::NodeEntry{n, 0, 0}); } else { - ++(it->second); + // other use of same node as input to subgraph + name_count_map[var_name]++; } - nnvm::ObjectPtr n = nnvm::CreateVariableNode( - var_name + std::to_string(name_count_map[var_name])); - *e = nnvm::NodeEntry{n, 0, 0}; + if (dedup) { + *e = name_map[var_name]; + } else { + nnvm::ObjectPtr n = nnvm::CreateVariableNode( + var_name + std::to_string(name_count_map[var_name])); + *e = nnvm::NodeEntry{n, 0, 0}; + } } } @@ -593,10 +609,14 @@ void CreateSubgraphNode(nnvm::Graph* g, #if DEBUG_SUBGRAPH LOG(INFO) << "Searching for input entries..."; #endif - std::vector input_entries; + bool dedup_subgraph = g->HasAttr("dedup_subgraph"); + std::vector input_entries; // nodes that produce inputs to subgraph nodes FindInputEntries(*g, simple_nodes, subgraph_nodes, *entry_top_order_map, &input_entries); - std::vector orig_input_entries; - CutGraphInputs(input_entries, &orig_input_entries, false); + std::vector orig_input_entries; // original input entries (dupes) + std::vector unique_orig_entries; // unique original input entries + std::vector unique_input_entries; // unique modified subgraph inputs + CutGraphInputs(input_entries, &orig_input_entries, &unique_orig_entries, + &unique_input_entries, false, dedup_subgraph); #if DEBUG_SUBGRAPH PrintNodeEntries(input_entries); LOG(INFO) << "Searching for output entries..."; @@ -605,20 +625,42 @@ void CreateSubgraphNode(nnvm::Graph* g, FindOutputEntries(g, simple_nodes, subgraph_nodes, *entry_top_order_map, &output_entries); // Create a subgraph for the subgraph node + // entries are in topological order, with duplicates being neighbors nnvm::Symbol sym; + size_t idx = 0; + nnvm::NodeEntryEqual node_equal; sym.outputs.resize(output_entries.size()); for (size_t i = 0; i < output_entries.size(); ++i) { - sym.outputs[i] = *output_entries[i]; + if (dedup_subgraph) { + if (i == 0) { // add first entry + sym.outputs[idx] = *output_entries[i]; + } else if (!node_equal(sym.outputs[idx], *output_entries[i])) { // compare to see if diff + // add new entries + idx++; + sym.outputs[idx] = *output_entries[i]; + } // else skip over dupe entries + } else { + sym.outputs[i] = *output_entries[i]; + } } + if (dedup_subgraph) + sym.outputs.resize(idx+1); + const SubgraphPropertyPtr& subg_prop = g->GetAttr("subgraph_property"); - subg_prop->InitSubgraphInputs(&input_entries, &orig_input_entries); + if (dedup_subgraph) + subg_prop->InitSubgraphInputs(&unique_input_entries, &unique_orig_entries); + else + subg_prop->InitSubgraphInputs(&input_entries, &orig_input_entries); nnvm::ObjectPtr n = subg_prop->CreateSubgraphNode(sym, subgraph_selector, subgraph_id); // CreateSubgraphNode returns NULL if subgraph property determines that subgraph is sub-optimal // In that case, subgraph node is not created and graph is not modified if (n) { // Connect the external nodes to the subgraph node. subg_prop->ConnectSubgraphOutputs(n, &output_entries); - subg_prop->ConnectSubgraphInputs(n, &input_entries, &orig_input_entries); + if (dedup_subgraph) + subg_prop->ConnectSubgraphInputs(n, &unique_input_entries, &unique_orig_entries); + else + subg_prop->ConnectSubgraphInputs(n, &input_entries, &orig_input_entries); const auto& indexed_graph = g->indexed_graph(); for (size_t i = 0; i < n->inputs.size(); ++i) { diff --git a/src/operator/subgraph/partitioner/custom_subgraph_property.h b/src/operator/subgraph/partitioner/custom_subgraph_property.h index f2d715b52432..49d5a8fb683f 100644 --- a/src/operator/subgraph/partitioner/custom_subgraph_property.h +++ b/src/operator/subgraph/partitioner/custom_subgraph_property.h @@ -319,9 +319,8 @@ class CustomSubgraphProperty: public SubgraphProperty { opt_vals_.clear(); options_map_.clear(); // store options in map in subgraph property to re-use later for reviewSubgraph - for (auto& kv : options_map) { - options_map_.push_back(kv); - } + options_map_.insert(options_map.begin(), options_map.end()); + // convert options_map_ to char* to pass to backend library for (auto& kv : options_map_) { opt_keys_.push_back(kv.first.c_str()); @@ -526,7 +525,7 @@ class CustomSubgraphProperty: public SubgraphProperty { mxnet::ext::opCallFree_t call_free_; std::unordered_map supported_nodes; std::string subgraph_op_name; - std::vector> options_map_; + std::unordered_map options_map_; std::vector opt_keys_, opt_vals_; std::vector in_arg_names, in_aux_names; NDArray **in_args_ptr; diff --git a/src/operator/subgraph/subgraph_property.h b/src/operator/subgraph/subgraph_property.h index 06a7fa5f67a2..a5f7f9441079 100644 --- a/src/operator/subgraph/subgraph_property.h +++ b/src/operator/subgraph/subgraph_property.h @@ -257,7 +257,7 @@ class SubgraphProperty { kAdjust, }; - explicit SubgraphProperty(SgPropertyType type = kCreate) : type_(type) {} + explicit SubgraphProperty(SgPropertyType type = kCreate) : type_(type), dedup_subgraph(false) {} /*! * \brief The criteria of selecting the subgraph nodes. @@ -268,7 +268,14 @@ class SubgraphProperty { } virtual void PrePartition(const nnvm::Graph& g, - const std::unordered_map& options_map) {} + const std::unordered_map& options_map) { + if (options_map.count("dedup_subgraph") > 0 && + options_map.at("dedup_subgraph").compare("True") == 0) { + dedup_subgraph = true; + } else { + dedup_subgraph = false; + } + } virtual void PostPartition(const nnvm::Graph& g) {} @@ -341,8 +348,22 @@ class SubgraphProperty { */ virtual void ConnectSubgraphOutputs(const nnvm::ObjectPtr subgraph_node, std::vector* output_entries) const { + // Collapse output_entries pointing to same NodeEntry + // Outputs are ordered, duplicates are neighbors + nnvm::NodeEntryEqual node_equal; + nnvm::NodeEntry prevNodeEntry; + uint32_t idx = 0; for (size_t i = 0; i < output_entries->size(); ++i) { - *output_entries->at(i) = nnvm::NodeEntry{subgraph_node, static_cast(i), 0}; + if (dedup_subgraph) { + // increment the output idx for each unique output of the subgraph + if (i != 0 && !node_equal(prevNodeEntry, *output_entries->at(i))) + idx++; + prevNodeEntry = *output_entries->at(i); // make a copy so we can compare before modifying + // change output entry to point to subgraph instead of original node + *output_entries->at(i) = nnvm::NodeEntry{subgraph_node, idx, 0}; + } else { + *output_entries->at(i) = nnvm::NodeEntry{subgraph_node, static_cast(i), 0}; + } } } /*! @@ -406,6 +427,7 @@ class SubgraphProperty { protected: SgPropertyType type_; std::unordered_map> attrs_; + bool dedup_subgraph; }; using SubgraphPropertyPtr = std::shared_ptr; diff --git a/tests/python/unittest/test_subgraph_op.py b/tests/python/unittest/test_subgraph_op.py index 81665f20057c..79a104f39233 100644 --- a/tests/python/unittest/test_subgraph_op.py +++ b/tests/python/unittest/test_subgraph_op.py @@ -85,7 +85,19 @@ def network_structure_7(): ret = ret1 + ret2 return (ret, ['data'], [(1,)]) -def get_graphs(): +def network_structure_8(): + # in this graph, two nodes in the subgraph consume the same input, and + # and two nodes outside the subgraph consume a single output from the subgraph + data = mx.sym.Variable('data', shape=(1,)) + sin1 = mx.sym.sin(data) + sin2 = mx.sym.sin(data) + plus = sin1 + sin2 + ret1 = mx.sym.cos(plus) + ret2 = mx.sym.cos(plus) + ret = ret1 - ret2 + return (ret, ['data'], [(1,)]) + +def get_graphs(): return [ (network_structure_1(), ['Convolution']), (network_structure_2(), ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']), @@ -102,7 +114,8 @@ def get_graphs(): (network_structure_6(), [mx.sym.sin.__name__]), (network_structure_6(), [mx.sym.Convolution.__name__]), (network_structure_6(), [mx.sym.sin.__name__, mx.sym.Convolution.__name__]), - (network_structure_7(), ['sin', 'elemwise_add', '_plus', '_Plus']) + (network_structure_7(), ['sin', 'elemwise_add', '_plus', '_Plus']), + (network_structure_8(), ['sin', 'elemwise_add']) ] def check_subgraph_exe1(sym, subgraph_backend, op_names): @@ -157,7 +170,6 @@ def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend))) del os.environ['MXNET_SUBGRAPH_BACKEND'] return exe - original_exec = get_executor(sym) partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) outputs1 = original_exec.outputs @@ -378,6 +390,36 @@ def check_subgraph_exe9(sym, subgraph_backend, op_names): for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) +def check_subgraph_exe10(sym, subgraph_backend, op_names): + """Call optimize_for to infer shapes, types and dtypes followed by graph partitioning and + dedup subgraph, then bind and compare results of the partitioned sym and the original sym.""" + # bind + arg_shapes, _, aux_shapes = sym.infer_shape() + arg_names = sym.list_arguments() + aux_names = sym.list_auxiliary_states() + arg_dict = {name:mx.nd.random.uniform(shape=shape) for name,shape in zip(arg_names,arg_shapes)} + aux_dict = {name:mx.nd.random.uniform(shape=shape) for name,shape in zip(aux_names,aux_shapes)} + exe1 = sym.bind(ctx=mx.current_context(), args=arg_dict, aux_states=aux_dict, grad_req='null') + exe1.forward() + + # infer shape/type before partition before bind + check_call(_LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)), + c_str_array(op_names))) + print(sym.tojson()) + part_sym = sym.optimize_for(subgraph_backend, arg_dict, aux_dict, dedup_subgraph=True) + print(part_sym.tojson()) + check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend))) + + exe2 = part_sym.bind(ctx=mx.current_context(), args=arg_dict, aux_states=aux_dict, grad_req='null') + exe2.forward() + + # compare outputs + outputs1 = exe1.outputs + outputs2 = exe2.outputs + assert len(outputs1) == len(outputs2) + for i in range(len(outputs1)): + assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) + def check_subgraph(subgraph_backend): for sym, op_names in get_graphs(): check_subgraph_exe1(sym[0], subgraph_backend, op_names) @@ -391,6 +433,7 @@ def check_subgraph_backend_sym(subgraph_backend): check_subgraph_exe6(sym[0], subgraph_backend, op_names) check_subgraph_exe7(sym[0], subgraph_backend, op_names) check_subgraph_exe8(sym[0], subgraph_backend, op_names) + check_subgraph_exe10(sym[0], subgraph_backend, op_names) def check_subgraph_backend_gluon(subgraph_backend): for sym, op_names in get_graphs():