From a01ec6c477a42d4c6286b2d5236b25f1a29c8e5b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 7 Jan 2024 01:54:09 +0300 Subject: [PATCH 1/3] tools/compare: when dumping json, pretty-print it It's rather completely non-human-readable otherwise. I can't imagine the filesize really matters, and if it does, it should just be compressed later on. --- tools/compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/compare.py b/tools/compare.py index 3cc9e5eb4a..7572520cc0 100755 --- a/tools/compare.py +++ b/tools/compare.py @@ -327,7 +327,7 @@ def main(): # Optionally, diff and output to JSON if args.dump_to_json is not None: with open(args.dump_to_json, "w") as f_json: - json.dump(diff_report, f_json) + json.dump(diff_report, f_json, indent=1) class TestParser(unittest.TestCase): From 9282814d0c4c5ec314efb96f4d0390db5fcc8294 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 7 Jan 2024 02:26:51 +0300 Subject: [PATCH 2/3] tools/compare: add failing test --- tools/gbench/Inputs/test5_run0.json | 18 ++++++ tools/gbench/Inputs/test5_run1.json | 18 ++++++ tools/gbench/report.py | 96 +++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 tools/gbench/Inputs/test5_run0.json create mode 100644 tools/gbench/Inputs/test5_run1.json diff --git a/tools/gbench/Inputs/test5_run0.json b/tools/gbench/Inputs/test5_run0.json new file mode 100644 index 0000000000..074103b11d --- /dev/null +++ b/tools/gbench/Inputs/test5_run0.json @@ -0,0 +1,18 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_ManyRepetitions", + "iterations": 1000, + "real_time": 1, + "cpu_time": 1000, + "time_unit": "s" + } + ] +} diff --git a/tools/gbench/Inputs/test5_run1.json b/tools/gbench/Inputs/test5_run1.json new file mode 100644 index 0000000000..430df9f0da --- /dev/null +++ b/tools/gbench/Inputs/test5_run1.json @@ -0,0 +1,18 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_ManyRepetitions", + "iterations": 1000, + "real_time": 1000, + "cpu_time": 1, + "time_unit": "s" + } + ] +} diff --git a/tools/gbench/report.py b/tools/gbench/report.py index 10e6b508f0..b6f7c3bf9a 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -1490,6 +1490,102 @@ def test_json_diff_report_pretty_printing(self): self.assertEqual(out["name"], expected) +class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2( + unittest.TestCase +): + @classmethod + def setUpClass(cls): + def load_results(): + import json + + testInputs = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "Inputs" + ) + testOutput1 = os.path.join(testInputs, "test5_run0.json") + testOutput2 = os.path.join(testInputs, "test5_run1.json") + with open(testOutput1, "r") as f: + json1 = json.load(f) + json1["benchmarks"] = [ + json1["benchmarks"][0] for i in range(1000) + ] + with open(testOutput2, "r") as f: + json2 = json.load(f) + json2["benchmarks"] = [ + json2["benchmarks"][0] for i in range(1000) + ] + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report(json1, json2, utest=True) + + @unittest.expectedFailure + def test_json_diff_report_pretty_printing(self): + expect_line = [ + "BM_ManyRepetitions_pvalue", + "0.0000", + "0.0000", + "U", + "Test,", + "Repetitions:", + "1000", + "vs", + "1000", + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False + ) + output_lines = output_lines_with_header[2:] + found = False + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(" ") if x] + found = expect_line == parts + if found: + break + self.assertTrue(found) + + @unittest.expectedFailure + def test_json_diff_report(self): + expected_output = [ + { + "name": "BM_ManyRepetitions", + "label": "", + "time_unit": "s", + "run_type": "", + "aggregate_name": "", + "utest": { + "have_optimal_repetitions": True, + "cpu_pvalue": 0.0, + "time_pvalue": 0.0, + "nr_of_repetitions": 1000, + "nr_of_repetitions_other": 1000, + }, + }, + { + "name": "OVERALL_GEOMEAN", + "label": "", + "measurements": [ + { + "real_time": 1.0, + "cpu_time": 1000.000000000069, + "real_time_other": 1000.000000000069, + "cpu_time_other": 1.0, + "time": 999.000000000069, + "cpu": -0.9990000000000001, + } + ], + "time_unit": "s", + "run_type": "aggregate", + "aggregate_name": "geomean", + "utest": {}, + }, + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip(self.json_diff_report, expected_output): + self.assertEqual(out["name"], expected["name"]) + self.assertEqual(out["time_unit"], expected["time_unit"]) + assert_utest(self, out, expected) + + def assert_utest(unittest_instance, lhs, rhs): if lhs["utest"]: unittest_instance.assertAlmostEqual( From 98757f880f6a36624280f2a32f8774bccc45272e Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 7 Jan 2024 01:59:58 +0300 Subject: [PATCH 3/3] tools/compare: don't actually discard valid (but zero) `pvalue` So, this is embarressing. For a very large number of repetitions, we can end up with pvalue of a true zero, and it obviously compares false, and we treat it as-if we failed to compute it... --- tools/gbench/report.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/gbench/report.py b/tools/gbench/report.py index b6f7c3bf9a..7158fd1654 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -315,7 +315,7 @@ def get_difference_report(json1, json2, utest=False): have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( timings_cpu, timings_time ) - if cpu_pvalue and time_pvalue: + if cpu_pvalue is not None and time_pvalue is not None: utest_results = { "have_optimal_repetitions": have_optimal_repetitions, "cpu_pvalue": cpu_pvalue, @@ -1518,7 +1518,6 @@ def load_results(): json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2, utest=True) - @unittest.expectedFailure def test_json_diff_report_pretty_printing(self): expect_line = [ "BM_ManyRepetitions_pvalue", @@ -1543,7 +1542,6 @@ def test_json_diff_report_pretty_printing(self): break self.assertTrue(found) - @unittest.expectedFailure def test_json_diff_report(self): expected_output = [ {