Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pengwa committed Feb 1, 2023
1 parent d06ad94 commit 6b77b39
Show file tree
Hide file tree
Showing 76 changed files with 53 additions and 183 deletions.
1 change: 1 addition & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
level: warning
filter_mode: added
lib: true
pyright_version: 1.1.291
- name: pylint
uses: dciborow/[email protected]
with:
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/onnxruntime_inference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ class Session:
"""

def __init__(self):

# self._sess is managed by the derived class and relies on bindings from C.InferenceSession
self._sess = None
self._enable_fallback = True
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/python/tools/quantization/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,6 @@ def collect_value(self, name_to_arr):
)

def merge_histogram(self, old_histogram, data_arr, new_min, new_max, new_threshold):

(old_hist, old_hist_edges, old_min, old_max, old_threshold) = old_histogram

if new_threshold <= old_threshold:
Expand Down Expand Up @@ -849,7 +848,6 @@ def create_calibrator(
use_external_data_format=False,
extra_options={},
):

calibrator = None
if calibrate_method == CalibrationMethod.MinMax:
# default settings for min-max algorithm
Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/python/tools/quantization/onnx_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def __init__(
op_types_to_quantize,
extra_options=None,
):

if not model_has_infer_metadata(model):
model = save_and_reload_model(model)
self.value_infos = {vi.name: vi for vi in model.graph.value_info}
Expand Down Expand Up @@ -808,7 +807,11 @@ def __quantize_inputs(
initializer = find_by_name(node_input, self.model.initializer())
if initializer is not None:
if self.per_channel and op_level_per_channel:
(q_weight_name, zp_name, scale_name,) = self.quantize_weight_per_channel(
(
q_weight_name,
zp_name,
scale_name,
) = self.quantize_weight_per_channel(
initializer.name,
self.weight_qType if initializer_use_weight_qType else self.activation_qType,
axis,
Expand Down
16 changes: 0 additions & 16 deletions onnxruntime/python/tools/tensorrt/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ def get_latency_result(runtimes, batch_size):


def get_ort_session_inputs_and_outputs(name, session, ort_input):

sess_inputs = {}
sess_outputs = None

Expand Down Expand Up @@ -428,7 +427,6 @@ def inference_ort(


def inference_ort_and_get_prediction(name, session, ort_inputs):

ort_outputs = []
for ort_input in ort_inputs:
sess_inputs, sess_outputs = get_ort_session_inputs_and_outputs(name, session, ort_input)
Expand Down Expand Up @@ -548,7 +546,6 @@ def generate_onnx_model_random_input(test_times, ref_input):
inputs = []

for i in range(test_times):

input_data = []
for tensor in ref_input:
shape = tensor.shape
Expand Down Expand Up @@ -743,7 +740,6 @@ def update_metrics_map_ori(model_to_metrics, name, ep_to_operator):
#
###################################################################################################
def update_fail_model_map(model_to_fail_ep, model_name, ep, e_type, e):

if model_name in model_to_fail_ep and ep in model_to_fail_ep[model_name]:
return

Expand All @@ -766,7 +762,6 @@ def update_fail_model_map(model_to_fail_ep, model_name, ep, e_type, e):


def update_fail_model_map_ori(model_to_fail_ep, fail_results, model_name, ep, e_type, e):

if model_name in model_to_fail_ep and ep in model_to_fail_ep[model_name]:
return

Expand All @@ -785,7 +780,6 @@ def update_fail_model_map_ori(model_to_fail_ep, fail_results, model_name, ep, e_


def skip_ep(model_name, ep, model_to_fail_ep):

if model_name not in model_to_fail_ep:
return False

Expand Down Expand Up @@ -969,7 +963,6 @@ def find_test_data_directory(path):


def parse_models_info_from_directory(path, models):

test_data_dir = find_test_data_directory(path)

if test_data_dir:
Expand All @@ -996,15 +989,13 @@ def parse_models_info_from_directory(path, models):


def parse_models_info_from_file(root_dir, path, models):

# default working directory
root_working_directory = root_dir + "perf/"

with open(path) as f:
data = json.load(f)

for row in data:

if "root_working_directory" in row:
root_working_directory = row["root_working_directory"]
continue
Expand Down Expand Up @@ -1185,7 +1176,6 @@ def output_details(results, csv_filename):


def output_fail(model_to_fail_ep, csv_filename):

with open(csv_filename, mode="w", newline="") as csv_file:
column_names = ["model", "ep", "error type", "error message"]

Expand Down Expand Up @@ -1220,7 +1210,6 @@ def add_status_dict(status_dict, model_name, ep, status):


def build_status(status_dict, results, is_fail):

if is_fail:
for model, model_info in results.items():
for ep, ep_info in model_info.items():
Expand All @@ -1240,7 +1229,6 @@ def build_status(status_dict, results, is_fail):


def output_status(results, csv_filename):

need_write_header = True
if os.path.exists(csv_filename):
need_write_header = False
Expand Down Expand Up @@ -1533,7 +1521,6 @@ def output_metrics(model_to_metrics, csv_filename):

results = []
for model, ep_info in model_to_metrics.items():

result = {}
result_fp16 = {}
result["model_name"] = model
Expand Down Expand Up @@ -1663,7 +1650,6 @@ def test_models_eps(args, models):
ep_results = {"latency": {}, "metrics": {}, "session": {}}

for exec_provider in ep_list:

# Skip model + EP combinations that have already failed in a previous run.
if skip_ep(name, exec_provider, model_to_fail_ep):
continue
Expand Down Expand Up @@ -1752,7 +1738,6 @@ def run_model_on_ep(

# use float16.py for cuda fp16 only
if cuda_fp16 == exec_provider:

# handle model
if "model_path_fp16" in model_info:
model_path = os.path.normpath(os.path.join(model_work_dir, model_info["model_path_fp16"]))
Expand Down Expand Up @@ -1942,7 +1927,6 @@ def benchmark_model_on_ep(
return

if result:

ep_results["latency"][exec_provider] = {}
ep_results["latency"][exec_provider]["average_latency_ms"] = result["average_latency_ms"]
ep_results["latency"][exec_provider]["latency_90_percentile"] = result["latency_90_percentile"]
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/tools/tensorrt/perf/perf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def pretty_print(pp, json_object):


def parse_single_file(f):

try:
data = json.load(f)
except Exception as e:
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,12 @@ def run_onnxruntime(
fusion_options,
)
if "tf" in model_source:
(onnx_model_file, is_valid_onnx_model, vocab_size, max_sequence_length,) = export_onnx_model_from_tf(
(
onnx_model_file,
is_valid_onnx_model,
vocab_size,
max_sequence_length,
) = export_onnx_model_from_tf(
model_name,
MODELS[model_name][1],
MODELS[model_name][2],
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/python/tools/transformers/bert_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def main():
with open(summary_file, "w+", newline="") as tsv_file:
tsv_writer = csv.writer(tsv_file, delimiter="\t", lineterminator="\n")
headers = None
for (key, perf_result) in sorted_results:
for key, perf_result in sorted_results:
params = key.split(",")
if headers is None:
headers = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def run_test(
segment_ids_name,
input_mask_name,
):

# Try deduce input names from optimized model.
input_ids, segment_ids, input_mask = get_bert_inputs(
optimized_model, input_ids_name, segment_ids_name, input_mask_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,6 @@ def generate_gpt2_init_decoder(

# Try without the Casts before and after the MatMuls
if logits_matmul_to_residual_add_path is None:

# Normalization Node is : LayerNormalization
logits_matmul_to_residual_add_path = gpt2_init_decoder_model.match_parent_path(
logits_matmul_node,
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/python/tools/transformers/fusion_embedlayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ def check_attention_subgraph(
logger.debug("No Attention like subgraph in children of LayerNormalization")
return False
else:
if children_types != ["Add", "MatMul", "MatMul", "MatMul",] and children_types != [
if children_types != [
"Add",
"MatMul",
"MatMul",
"MatMul",
] and children_types != [
"MatMul",
"MatMul",
"MatMul",
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/python/tools/transformers/models/bart/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def print_args(args):


def user_command():

parent_parser = argparse.ArgumentParser(add_help=False)
parent_parser.add_argument("--max_length", type=int, default=20, help="default to 20")
parent_parser.add_argument("--min_length", type=int, default=0, help="default to 0")
Expand Down Expand Up @@ -66,7 +65,6 @@ def user_command():


if __name__ == "__main__":

args = user_command()
if args.opset_version < 14:
raise ValueError(f"The minimum supported opset version is 14! The given one was {args.opset_version}.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def _create_encoder_export(args, config: BartConfig):
def _prepare_encoder_decoder_kwargs_for_generation(
self, input_ids: torch.Tensor, model_kwargs, model_input_name: Optional[str] = None
) -> Dict[str, Any]:

# retrieve encoder hidden states
# 1. get encoder
encoder = self.get_encoder()
Expand Down Expand Up @@ -189,7 +188,6 @@ def export_encoder(args):
config, tokenizer = export_helper.initialize_config(args)

with torch.no_grad():

model, input_data = export_helper.initialize_model(config, tokenizer, args)
start_time = time.time()
model._prepare_encoder_decoder_kwargs_for_generation = _create_encoder_export(args, config).__get__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ def export_decoder(args):
config = decoder_config_update(config)

with torch.no_grad():

model, input_data = export_helper.initialize_model(config, tokenizer, args)
start_time = time.time()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def run_inference(args):
config, tokenizer = export_helper.initialize_config(args)

with torch.no_grad():

model, input_data = export_helper.initialize_model(config, tokenizer, args)
batch_num = 3
input_data = input_data.repeat(batch_num, 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def __init__(
top_k=20,
top_k_required_order=False,
):

self.batch_size = input_ids.shape[0]
self.input_length = input_ids.shape[1]
self.n_layer = num_layer
Expand Down Expand Up @@ -462,7 +461,10 @@ def test_generation(
)
Gpt2Helper.auto_increase_buffer_size(output_buffers, output_shapes)

(onnx_io_output, avg_latency_ms,) = Gpt2Helper.onnxruntime_inference_with_binded_io(
(
onnx_io_output,
avg_latency_ms,
) = Gpt2Helper.onnxruntime_inference_with_binded_io(
session,
onnx_io_runner.get_inputs(),
output_buffers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def parse_arguments():

# Create a dummy input for ONNX export.
def get_dummy_inputs(config, export_padding, device):

# When sequence length is multiple of windows size, there is no padding logic in ONNX graph
sequence_length = config.attention_window[0] + 1 if export_padding else config.attention_window[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ def create_longformer_test_data(
global_mask_name,
num_global_tokens,
):

input_ids, input_mask, global_mask = get_longformer_inputs(model, input_ids_name, input_mask_name, global_mask_name)
all_inputs = generate_test_data(
batch_size,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def __init__(self, decoder, lm_head, config):
self.config = config

def forward(self, decoder_input_ids, encoder_attention_mask, encoder_hidden_states, *past):

past_key_values = PastKeyValuesHelper.group_by_layer(past, self.config.num_layers)

decoder_outputs = self.decoder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def verify_onnx(

test_cases = [(4, 11), (1, 2), (3, 1), (8, 5)]
test_cases_max_diff = []
for (batch_size, encode_sequence_length) in test_cases[:max_cases]:
for batch_size, encode_sequence_length in test_cases[:max_cases]:
inputs = T5EncoderDecoderInitInputs.create_dummy(
model.config,
batch_size,
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/tools/transformers/onnx_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ def export_onnx_model_from_pt(
model_fusion_statistics,
fusion_options,
):

config, model = load_pt_model(model_name, model_class, cache_dir, config_modifier)
# config, model = load_pt_model_from_tf(model_name)
model.cpu()
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/tools/transformers/onnx_model_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
return

if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_v.input[0] == root_input:

mask_nodes = []
mask_index = None
attention_last_node = reshape_qkv_2
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/tools/transformers/onnx_model_tnlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def create_attention_node(
output: str,
add_qk_str: str,
) -> Union[NodeProto, None]:

assert num_heads > 0
if hidden_size > 0 and (hidden_size % num_heads) != 0:
logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
Expand Down
1 change: 0 additions & 1 deletion onnxruntime/python/tools/transformers/shape_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def add_extra_graph_output(self, extra_outputs):
names_to_evaluate = []
output_names = [output.name for output in self.model.graph.output]
for name in extra_outputs:

if self.get_initializer(name) is not None: # already a constant
continue
names_to_evaluate.append(name)
Expand Down
Loading

0 comments on commit 6b77b39

Please sign in to comment.