Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[1.x] Backport Fix for duplicate subgraph inputs/outputs (#16131) #19112

Merged
merged 12 commits into from
Sep 15, 2020
Merged
19 changes: 11 additions & 8 deletions example/extensions/lib_subgraph/test_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def test(backend):
# with propogating shapes/types
print('-------------------------------')
print('Testing %s partitioning with shapes/types' % backend)
mysym2 = sym.optimize_for(backend,args)
print(sym.tojson())
mysym2 = sym.optimize_for(backend, args, dedup_subgraph=True)
print(mysym2.tojson())
exe2 = mysym2.bind(ctx=mx.cpu(), args=args)
out2 = exe2.forward()
Expand All @@ -72,15 +73,15 @@ def test(backend):
# with propogating shapes/types, rejecting subgraph
print('-------------------------------')
print('Testing %s partitioning with shapes/types - rejecting subgraph' % backend)
mysym2 = sym.optimize_for(backend, args, reject=True)
mysym2 = sym.optimize_for(backend, args, reject=True, dedup_subgraph=True)
exe2 = mysym2.bind(ctx=mx.cpu(), args=args)
out2 = exe2.forward()
print(out2)

# without propogating shapes/types
print('-------------------------------')
print('Testing %s partitioning without shapes/types' % backend)
mysym3 = sym.optimize_for(backend, myOpt='yello')
mysym3 = sym.optimize_for(backend, myOpt='yello', dedup_subgraph=True)
exe3 = mysym3.bind(ctx=mx.cpu(), args=args)
out3 = exe3.forward()
print(out3)
Expand All @@ -91,7 +92,7 @@ def test(backend):
inputs = [a,b]
sym_block = nn.SymbolBlock(sym, inputs)
sym_block.initialize()
sym_block.hybridize(backend=backend)
sym_block.hybridize(backend=backend, backend_opts={'dedup_subgraph':True})
out2 = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
print(out2)

Expand All @@ -101,13 +102,15 @@ def test(backend):
inputs = [a,b]
sym_block2 = nn.SymbolBlock(sym, inputs)
sym_block2.initialize()
sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend=backend)
sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend=backend,
backend_opts={'dedup_subgraph':True})
sym_block2.export('partitioned')

# Test with additional input to subgraph op
print('-------------------------------')
print('Testing %s Gluon Hybridize partitioning with extra input' % backend)
sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass", clear=False)
sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass",
clear=False, backend_opts={'dedup_subgraph':True})
out3 = sym_block2(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
print(out3)

Expand All @@ -125,7 +128,7 @@ def test(backend):
# with propogating shapes/types
print('-------------------------------')
print('Testing %s partitioning with shapes/types' % backend)
mysym6 = sym2.optimize_for(backend, args, reqArgs=True)
mysym6 = sym2.optimize_for(backend, args, reqArgs=True, dedup_subgraph=True)
print(mysym6.tojson())
exe6 = mysym6.bind(ctx=mx.cpu(), args=args)
out6 = exe6.forward()
Expand All @@ -134,7 +137,7 @@ def test(backend):
# without propogating shapes/types
print('-------------------------------')
print('Testing %s partitioning without shapes/types' % backend)
mysym7 = sym2.optimize_for(backend, reqArgs=True)
mysym7 = sym2.optimize_for(backend, reqArgs=True, dedup_subgraph=True)
exe7 = mysym7.bind(ctx=mx.cpu(), args=args)
out7 = exe7.forward()
print(out7)
Expand Down
5 changes: 5 additions & 0 deletions src/c_api/c_api_symbolic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1484,6 +1484,11 @@ int MXOptimizeForBackend(SymbolHandle sym_handle,
for (mx_uint i = 0; i < num_options; ++i)
options_map.emplace(keys[i], vals[i]);

// set dedup option as attribute on graph to enable dedup during partitioning
if (options_map.count("dedup_subgraph") > 0 &&
options_map.at("dedup_subgraph").compare("True") == 0)
g.attrs["dedup_subgraph"] = std::make_shared<nnvm::any>(std::string("True"));

if (mxnet::op::SubgraphBackendRegistry::Get()->backend_map_.count(backend_name) > 0) {
// use subgraph backend
const auto backend = mxnet::op::SubgraphBackendRegistry
Expand Down
70 changes: 56 additions & 14 deletions src/operator/subgraph/build_subgraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,33 +537,49 @@ void FindOutputEntries(nnvm::Graph* g,
*/
void CutGraphInputs(const std::vector<nnvm::NodeEntry*> &input_entries,
std::vector<nnvm::NodeEntry> *orig_entries,
const bool skip_var = false) {
std::vector<nnvm::NodeEntry> *unique_orig_entries,
std::vector<nnvm::NodeEntry*> *unique_input_entries,
const bool skip_var = false,
const bool dedup = false) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment applies everywhere where we set the defaults for the dedup flag.

It seems like this is more a problem with MKLDNN.
I understand they have done that for master branch.
Ideally, they fix it at their end for 1.8 as well.

Failing that, we should go with default = true, except when compiled for MKLDNN and optimize_for() is called by CPU. All other paths would want to optimize away the dupe input.
This amounts to MKLDNN patching their fix.

GIven the timelines for MXNet 1.8, this may be too tight anyway.
More importantly, the MKLDNN issues are fixed in master.

I am ok going ahead with this as is

orig_entries->resize(input_entries.size());
// map for creating unique var nodes for deduplicating entries from the same node
std::unordered_map<std::string, nnvm::NodeEntry> name_map;
std::unordered_map<std::string, int> name_count_map;

for (size_t i = 0; i < input_entries.size(); ++i) {
nnvm::NodeEntry *e = input_entries[i];
// If the node is a variable itself, we may want to skip the node.
if (e->node->is_variable() && skip_var) {
continue;
}

// save all original entries
orig_entries->at(i) = *e;
// get unique name for this entry
nnvm::Symbol sym;
sym.outputs.push_back(*e);
const auto output_names = sym.ListOutputNames();
CHECK_EQ(output_names.size(), 1U);
const std::string& var_name = output_names[0];
auto it = name_count_map.find(var_name);
if (name_count_map.end() == it) {
// check if this entry is a duplicate
if (name_count_map.count(var_name) == 0) {
// first use of this node as input to subgraph
name_count_map.emplace(var_name, 0);
unique_orig_entries->push_back(*e);
unique_input_entries->push_back(e);
nnvm::ObjectPtr n = nnvm::CreateVariableNode(var_name + std::to_string(0));
name_map.emplace(var_name, nnvm::NodeEntry{n, 0, 0});
} else {
++(it->second);
// other use of same node as input to subgraph
name_count_map[var_name]++;
}
nnvm::ObjectPtr n = nnvm::CreateVariableNode(
var_name + std::to_string(name_count_map[var_name]));

*e = nnvm::NodeEntry{n, 0, 0};
if (dedup) {
*e = name_map[var_name];
} else {
nnvm::ObjectPtr n = nnvm::CreateVariableNode(
var_name + std::to_string(name_count_map[var_name]));
*e = nnvm::NodeEntry{n, 0, 0};
}
}
}

Expand Down Expand Up @@ -593,10 +609,14 @@ void CreateSubgraphNode(nnvm::Graph* g,
#if DEBUG_SUBGRAPH
LOG(INFO) << "Searching for input entries...";
#endif
std::vector<nnvm::NodeEntry*> input_entries;
bool dedup_subgraph = g->HasAttr("dedup_subgraph");
std::vector<nnvm::NodeEntry*> input_entries; // nodes that produce inputs to subgraph nodes
FindInputEntries(*g, simple_nodes, subgraph_nodes, *entry_top_order_map, &input_entries);
std::vector<nnvm::NodeEntry> orig_input_entries;
CutGraphInputs(input_entries, &orig_input_entries, false);
std::vector<nnvm::NodeEntry> orig_input_entries; // original input entries (dupes)
std::vector<nnvm::NodeEntry> unique_orig_entries; // unique original input entries
std::vector<nnvm::NodeEntry*> unique_input_entries; // unique modified subgraph inputs
CutGraphInputs(input_entries, &orig_input_entries, &unique_orig_entries,
&unique_input_entries, false, dedup_subgraph);
#if DEBUG_SUBGRAPH
PrintNodeEntries(input_entries);
LOG(INFO) << "Searching for output entries...";
Expand All @@ -605,20 +625,42 @@ void CreateSubgraphNode(nnvm::Graph* g,
FindOutputEntries(g, simple_nodes, subgraph_nodes, *entry_top_order_map, &output_entries);

// Create a subgraph for the subgraph node
// entries are in topological order, with duplicates being neighbors
nnvm::Symbol sym;
size_t idx = 0;
nnvm::NodeEntryEqual node_equal;
sym.outputs.resize(output_entries.size());
for (size_t i = 0; i < output_entries.size(); ++i) {
sym.outputs[i] = *output_entries[i];
if (dedup_subgraph) {
if (i == 0) { // add first entry
sym.outputs[idx] = *output_entries[i];
} else if (!node_equal(sym.outputs[idx], *output_entries[i])) { // compare to see if diff
// add new entries
idx++;
sym.outputs[idx] = *output_entries[i];
} // else skip over dupe entries
} else {
sym.outputs[i] = *output_entries[i];
}
}
if (dedup_subgraph)
sym.outputs.resize(idx+1);

const SubgraphPropertyPtr& subg_prop = g->GetAttr<SubgraphPropertyPtr>("subgraph_property");
subg_prop->InitSubgraphInputs(&input_entries, &orig_input_entries);
if (dedup_subgraph)
subg_prop->InitSubgraphInputs(&unique_input_entries, &unique_orig_entries);
else
subg_prop->InitSubgraphInputs(&input_entries, &orig_input_entries);
nnvm::ObjectPtr n = subg_prop->CreateSubgraphNode(sym, subgraph_selector, subgraph_id);
// CreateSubgraphNode returns NULL if subgraph property determines that subgraph is sub-optimal
// In that case, subgraph node is not created and graph is not modified
if (n) {
// Connect the external nodes to the subgraph node.
subg_prop->ConnectSubgraphOutputs(n, &output_entries);
subg_prop->ConnectSubgraphInputs(n, &input_entries, &orig_input_entries);
if (dedup_subgraph)
subg_prop->ConnectSubgraphInputs(n, &unique_input_entries, &unique_orig_entries);
else
subg_prop->ConnectSubgraphInputs(n, &input_entries, &orig_input_entries);

const auto& indexed_graph = g->indexed_graph();
for (size_t i = 0; i < n->inputs.size(); ++i) {
Expand Down
7 changes: 3 additions & 4 deletions src/operator/subgraph/partitioner/custom_subgraph_property.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,8 @@ class CustomSubgraphProperty: public SubgraphProperty {
opt_vals_.clear();
options_map_.clear();
// store options in map in subgraph property to re-use later for reviewSubgraph
for (auto& kv : options_map) {
options_map_.push_back(kv);
}
options_map_.insert(options_map.begin(), options_map.end());

// convert options_map_ to char* to pass to backend library
for (auto& kv : options_map_) {
opt_keys_.push_back(kv.first.c_str());
Expand Down Expand Up @@ -526,7 +525,7 @@ class CustomSubgraphProperty: public SubgraphProperty {
mxnet::ext::opCallFree_t call_free_;
std::unordered_map<std::string, int> supported_nodes;
std::string subgraph_op_name;
std::vector<std::pair<std::string, std::string>> options_map_;
std::unordered_map<std::string, std::string> options_map_;
std::vector<const char*> opt_keys_, opt_vals_;
std::vector<std::string> in_arg_names, in_aux_names;
NDArray **in_args_ptr;
Expand Down
28 changes: 25 additions & 3 deletions src/operator/subgraph/subgraph_property.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ class SubgraphProperty {
kAdjust,
};

explicit SubgraphProperty(SgPropertyType type = kCreate) : type_(type) {}
explicit SubgraphProperty(SgPropertyType type = kCreate) : type_(type), dedup_subgraph(false) {}

/*!
* \brief The criteria of selecting the subgraph nodes.
Expand All @@ -268,7 +268,14 @@ class SubgraphProperty {
}

virtual void PrePartition(const nnvm::Graph& g,
const std::unordered_map<std::string, std::string>& options_map) {}
const std::unordered_map<std::string, std::string>& options_map) {
if (options_map.count("dedup_subgraph") > 0 &&
options_map.at("dedup_subgraph").compare("True") == 0) {
dedup_subgraph = true;
} else {
dedup_subgraph = false;
}
}

virtual void PostPartition(const nnvm::Graph& g) {}

Expand Down Expand Up @@ -341,8 +348,22 @@ class SubgraphProperty {
*/
virtual void ConnectSubgraphOutputs(const nnvm::ObjectPtr subgraph_node,
std::vector<nnvm::NodeEntry*>* output_entries) const {
// Collapse output_entries pointing to same NodeEntry
// Outputs are ordered, duplicates are neighbors
nnvm::NodeEntryEqual node_equal;
nnvm::NodeEntry prevNodeEntry;
uint32_t idx = 0;
for (size_t i = 0; i < output_entries->size(); ++i) {
*output_entries->at(i) = nnvm::NodeEntry{subgraph_node, static_cast<uint32_t>(i), 0};
if (dedup_subgraph) {
// increment the output idx for each unique output of the subgraph
if (i != 0 && !node_equal(prevNodeEntry, *output_entries->at(i)))
idx++;
prevNodeEntry = *output_entries->at(i); // make a copy so we can compare before modifying
// change output entry to point to subgraph instead of original node
*output_entries->at(i) = nnvm::NodeEntry{subgraph_node, idx, 0};
} else {
*output_entries->at(i) = nnvm::NodeEntry{subgraph_node, static_cast<uint32_t>(i), 0};
}
}
}
/*!
Expand Down Expand Up @@ -406,6 +427,7 @@ class SubgraphProperty {
protected:
SgPropertyType type_;
std::unordered_map<std::string, std::shared_ptr<nnvm::any>> attrs_;
bool dedup_subgraph;
};

using SubgraphPropertyPtr = std::shared_ptr<SubgraphProperty>;
Expand Down
49 changes: 46 additions & 3 deletions tests/python/unittest/test_subgraph_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,19 @@ def network_structure_7():
ret = ret1 + ret2
return (ret, ['data'], [(1,)])

def get_graphs():
def network_structure_8():
# in this graph, two nodes in the subgraph consume the same input, and
# and two nodes outside the subgraph consume a single output from the subgraph
data = mx.sym.Variable('data', shape=(1,))
sin1 = mx.sym.sin(data)
sin2 = mx.sym.sin(data)
plus = sin1 + sin2
ret1 = mx.sym.cos(plus)
ret2 = mx.sym.cos(plus)
ret = ret1 - ret2
return (ret, ['data'], [(1,)])

def get_graphs():
return [
(network_structure_1(), ['Convolution']),
(network_structure_2(), ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']),
Expand All @@ -102,7 +114,8 @@ def get_graphs():
(network_structure_6(), [mx.sym.sin.__name__]),
(network_structure_6(), [mx.sym.Convolution.__name__]),
(network_structure_6(), [mx.sym.sin.__name__, mx.sym.Convolution.__name__]),
(network_structure_7(), ['sin', 'elemwise_add', '_plus', '_Plus'])
(network_structure_7(), ['sin', 'elemwise_add', '_plus', '_Plus']),
(network_structure_8(), ['sin', 'elemwise_add'])
]

def check_subgraph_exe1(sym, subgraph_backend, op_names):
Expand Down Expand Up @@ -157,7 +170,6 @@ def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None):
check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend)))
del os.environ['MXNET_SUBGRAPH_BACKEND']
return exe

original_exec = get_executor(sym)
partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec)
outputs1 = original_exec.outputs
Expand Down Expand Up @@ -378,6 +390,36 @@ def check_subgraph_exe9(sym, subgraph_backend, op_names):
for i in range(len(outputs1)):
assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))

def check_subgraph_exe10(sym, subgraph_backend, op_names):
"""Call optimize_for to infer shapes, types and dtypes followed by graph partitioning and
dedup subgraph, then bind and compare results of the partitioned sym and the original sym."""
# bind
arg_shapes, _, aux_shapes = sym.infer_shape()
arg_names = sym.list_arguments()
aux_names = sym.list_auxiliary_states()
arg_dict = {name:mx.nd.random.uniform(shape=shape) for name,shape in zip(arg_names,arg_shapes)}
aux_dict = {name:mx.nd.random.uniform(shape=shape) for name,shape in zip(aux_names,aux_shapes)}
exe1 = sym.bind(ctx=mx.current_context(), args=arg_dict, aux_states=aux_dict, grad_req='null')
exe1.forward()

# infer shape/type before partition before bind
check_call(_LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)),
c_str_array(op_names)))
print(sym.tojson())
part_sym = sym.optimize_for(subgraph_backend, arg_dict, aux_dict, dedup_subgraph=True)
print(part_sym.tojson())
check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend)))

exe2 = part_sym.bind(ctx=mx.current_context(), args=arg_dict, aux_states=aux_dict, grad_req='null')
exe2.forward()

# compare outputs
outputs1 = exe1.outputs
outputs2 = exe2.outputs
assert len(outputs1) == len(outputs2)
for i in range(len(outputs1)):
assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))

def check_subgraph(subgraph_backend):
for sym, op_names in get_graphs():
check_subgraph_exe1(sym[0], subgraph_backend, op_names)
Expand All @@ -391,6 +433,7 @@ def check_subgraph_backend_sym(subgraph_backend):
check_subgraph_exe6(sym[0], subgraph_backend, op_names)
check_subgraph_exe7(sym[0], subgraph_backend, op_names)
check_subgraph_exe8(sym[0], subgraph_backend, op_names)
check_subgraph_exe10(sym[0], subgraph_backend, op_names)

def check_subgraph_backend_gluon(subgraph_backend):
for sym, op_names in get_graphs():
Expand Down