Skip to content

Commit

Permalink
Move initializers from subgraph to the main graph to reduce memory (#…
Browse files Browse the repository at this point in the history
…12310)

* move initializers from gpt2 subgraph to the beamsearch main graph
  • Loading branch information
tianleiwu authored Jul 26, 2022
1 parent f3dcbf5 commit 51a7998
Showing 1 changed file with 50 additions and 4 deletions.
54 changes: 50 additions & 4 deletions onnxruntime/python/tools/transformers/convert_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ def remove_shared_initializers(
shared_prefix: str = "shared_",
min_elements: int = 1024,
):
"""Remove intializers with same value from two graphs.
"""Remove initializers with same value from two graphs.
Args:
graph1 (GraphProto): the first graph to process
Expand All @@ -656,11 +656,11 @@ def remove_shared_initializers(
shared_initializers_names = []

for initializer1 in graph1.initializer:
if not (initializer1.dims and sum(initializer1.dims) > min_elements):
if not (initializer1.dims and sum(initializer1.dims) >= min_elements):
continue

for initializer2 in graph2.initializer:
if not (initializer2.dims and sum(initializer2.dims) > min_elements):
if not (initializer2.dims and sum(initializer2.dims) >= min_elements):
continue

if OnnxModel.has_same_value(initializer1, initializer2):
Expand Down Expand Up @@ -747,6 +747,37 @@ def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto
return initializers


def move_initializers(
graph: GraphProto,
min_elements: int = 1024,
) -> List[TensorProto]:
"""Remove initializers of a graph, when they have number of elements larger than a threshold.
Args:
graph (GraphProto): the graph.
min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.
Returns:
List[TensorProto]: initializers that are removed from the graph.
"""
moved_initializers = []
for tensor in graph.initializer:
if not (tensor.dims and sum(tensor.dims) >= min_elements):
continue
moved_initializers.append(tensor)

for initializer in moved_initializers:
graph.initializer.remove(initializer)

# Add type info, otherwise ORT will raise error: "input arg (*) does not have type information set by parent node."
for initializer in moved_initializers:
shape = onnx.numpy_helper.to_array(initializer).shape
value_info = onnx.helper.make_tensor_value_info(initializer.name, initializer.data_type, shape)
graph.value_info.append(value_info)

return moved_initializers


def convert_generation_model(args: argparse.Namespace, generation_type: GenerationType = GenerationType.BEAMSEARCH):
"""Convert model according to command line arguments.
Expand Down Expand Up @@ -904,8 +935,19 @@ def convert_generation_model(args: argparse.Namespace, generation_type: Generati
verify_t5_encoder_decoder_init_subgraph(encoder_model.graph, args.precision)

if not args.disable_shared_initializers:
# Unique shared initializers from the decoder and decoder_init could reduce memory usage in inference.
initializers = get_shared_initializers(encoder_model, decoder_model)
logger.info(f"{len(initializers)} shared initializers in subgraphs are moved to the main graph")
logger.info(
f"{len(initializers)} shared initializers ({[i.name for i in initializers]}) in subgraphs are moved to the main graph"
)

# TODO(tianleiwu): investigate the following which causes error in inference
# Move initializer from subgraph to main graph could reduce memory usage in inference.
# moved_initializers = move_initializers(encoder_model.graph)
# logger.info(
# f"{len(moved_initializers)} initializers ({[i.name for i in moved_initializers]}) from the encoder are moved to the main graph"
# )
# initializers.extend(moved_initializers)

node.attribute.extend(
[
Expand All @@ -918,6 +960,10 @@ def convert_generation_model(args: argparse.Namespace, generation_type: Generati
]
)
else:
# Move initializer from subgraph to main graph could reduce memory usage in inference.
initializers = move_initializers(decoder_model.graph)
logger.info(f"{len(initializers)} initializers from the decoder are moved to the main graph")

node.attribute.append(onnx.helper.make_attribute("decoder", decoder_model.graph))

# graph inputs
Expand Down

0 comments on commit 51a7998

Please sign in to comment.