diff --git a/Cargo.lock b/Cargo.lock index 880ba684..8d30f054 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,35 +198,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "async-executor" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c1da3ae8dabd9c00f453a329dfe1fb28da3c0a72e2478cdcd93171740c20499" -dependencies = [ - "async-lock", - "async-task", - "concurrent-queue", - "fastrand 2.0.1", - "futures-lite", - "slab", -] - -[[package]] -name = "async-global-executor" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1b6f5d7df27bd294849f8eec66ecfc63d11814df7a4f5d74168a2394467b776" -dependencies = [ - "async-channel", - "async-executor", - "async-io", - "async-lock", - "blocking", - "futures-lite", - "once_cell", -] - [[package]] name = "async-io" version = "1.13.0" @@ -293,53 +264,15 @@ dependencies = [ [[package]] name = "async-ssh2-lite" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f6414df0399afb863951edded534013e4c74cc4d7ab3b4a92b608986027ec7" -dependencies = [ - "async-io", - "futures-util", - "ssh2", -] - -[[package]] -name = "async-std" -version = "1.12.0" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" +checksum = "6cb43eaa75050ebe27dfd16e6de7078d9796a251f03c77d7a24c05aa9037c29b" dependencies = [ - "async-channel", - "async-global-executor", - "async-io", - "async-lock", - "crossbeam-utils", - "futures-channel", - "futures-core", - "futures-io", - "futures-lite", - "gloo-timers", - "kv-log-macro", - "log", - "memchr", - "once_cell", - "pin-project-lite", - "pin-utils", - "slab", - "wasm-bindgen-futures", -] - -[[package]] -name = "async-std-resolver" -version = "0.20.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf3e776afdf3a2477ef4854b85ba0dff3bd85792f685fb3c68948b4d304e4f0" -dependencies = [ - "async-std", "async-trait", - "futures-io", "futures-util", - "pin-utils", - "trust-dns-resolver", + "libssh2-sys", + "ssh2", + "tokio", ] [[package]] @@ -980,12 +913,6 @@ dependencies = [ "ordered-float", ] -[[package]] -name = "data-encoding" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" - [[package]] name = "dialoguer" version = "0.11.0" @@ -1021,26 +948,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "dirs" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "dotenv" version = "0.15.0" @@ -1089,18 +996,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enum-as-inner" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "570d109b813e904becc80d8d5da38376818a143348413f7149f1340fe04754d4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "env_logger" version = "0.10.0" @@ -1421,18 +1316,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "gloo-timers" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] - [[package]] name = "gpucachesim" version = "0.1.0" @@ -1587,17 +1470,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "hostname" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" -dependencies = [ - "libc", - "match_cfg", - "winapi", -] - [[package]] name = "html-escape" version = "0.2.13" @@ -1727,17 +1599,6 @@ dependencies = [ "cc", ] -[[package]] -name = "idna" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" -dependencies = [ - "matches", - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "idna" version = "0.4.0" @@ -1836,18 +1697,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "ipconfig" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e2f18aece9709094573a9f24f483c4f65caa4298e2f7ae1b71cc65d853fad7" -dependencies = [ - "socket2 0.3.19", - "widestring", - "winapi", - "winreg 0.6.2", -] - [[package]] name = "ipnet" version = "2.8.0" @@ -1907,15 +1756,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "kv-log-macro" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" -dependencies = [ - "log", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -1979,12 +1819,6 @@ dependencies = [ "cc", ] -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - [[package]] name = "linux-raw-sys" version = "0.3.8" @@ -2012,30 +1846,6 @@ name = "log" version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" -dependencies = [ - "value-bag", -] - -[[package]] -name = "lru-cache" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" -dependencies = [ - "linked-hash-map", -] - -[[package]] -name = "match_cfg" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" - -[[package]] -name = "matches" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" [[package]] name = "matrixmultiply" @@ -2589,15 +2399,6 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" -[[package]] -name = "plain_path" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f1b940aa8e0562ece01eb12a9731bb8f6f0325c2c97c8629f852504f01d4537" -dependencies = [ - "dirs", -] - [[package]] name = "playground" version = "0.1.0" @@ -2808,12 +2609,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.33" @@ -2909,17 +2704,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", -] - [[package]] name = "regex" version = "1.10.0" @@ -2959,10 +2743,17 @@ checksum = "c3cbb081b9784b07cceb8824c8583f86db4814d172ab043f3c23f7dc600bf83d" name = "remote" version = "0.1.0" dependencies = [ + "async-ssh2-lite", + "async-trait", "clap", "color-eyre", "dotenv", - "ssh_jumper", + "env_logger", + "futures", + "itertools 0.10.5", + "log", + "ssh2", + "strum", "thiserror", "tokio", ] @@ -3007,17 +2798,7 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots", - "winreg 0.50.0", -] - -[[package]] -name = "resolv-conf" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" -dependencies = [ - "hostname", - "quick-error", + "winreg", ] [[package]] @@ -3431,17 +3212,6 @@ dependencies = [ "syn 2.0.38", ] -[[package]] -name = "socket2" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e" -dependencies = [ - "cfg-if", - "libc", - "winapi", -] - [[package]] name = "socket2" version = "0.4.9" @@ -3489,33 +3259,6 @@ dependencies = [ "parking_lot 0.11.2", ] -[[package]] -name = "ssh_jumper" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fce1cdbdcecdd805b9a59de5d2889d81a091cd92b4a55978a0e9107efa4186da" -dependencies = [ - "async-io", - "async-ssh2-lite", - "async-std-resolver", - "futures", - "plain_path", - "ssh_jumper_model", - "tokio", -] - -[[package]] -name = "ssh_jumper_model" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e272e5de556b6db9291f189f06ab11d3445b0d9ef6317bcece364e02aae87bbe" -dependencies = [ - "async-ssh2-lite", - "async-std-resolver", - "plain_path", - "tokio", -] - [[package]] name = "stats" version = "0.1.0" @@ -3933,49 +3676,6 @@ dependencies = [ "tracing-log", ] -[[package]] -name = "trust-dns-proto" -version = "0.20.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca94d4e9feb6a181c690c4040d7a24ef34018d8313ac5044a61d21222ae24e31" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-io", - "futures-util", - "idna 0.2.3", - "ipnet", - "lazy_static", - "log", - "rand", - "smallvec", - "thiserror", - "tinyvec", - "url", -] - -[[package]] -name = "trust-dns-resolver" -version = "0.20.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecae383baad9995efaa34ce8e57d12c3f305e545887472a492b838f4b5cfb77a" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lazy_static", - "log", - "lru-cache", - "parking_lot 0.11.2", - "resolv-conf", - "smallvec", - "thiserror", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.4" @@ -4058,7 +3758,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" dependencies = [ "form_urlencoded", - "idna 0.4.0", + "idna", "percent-encoding", ] @@ -4149,12 +3849,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "value-bag" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d92ccd67fb88503048c01b59152a04effd0782d035a83a6d256ce6085f08f4a3" - [[package]] name = "vcpkg" version = "0.2.15" @@ -4312,12 +4006,6 @@ dependencies = [ "rustix 0.38.18", ] -[[package]] -name = "widestring" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c" - [[package]] name = "winapi" version = "0.3.9" @@ -4490,15 +4178,6 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" -[[package]] -name = "winreg" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2986deb581c4fe11b621998a5e53361efe6b48a151178d0cd9eeffa4dc6acc9" -dependencies = [ - "winapi", -] - [[package]] name = "winreg" version = "0.50.0" diff --git a/WIP.md b/WIP.md index 958eee7d..12708d57 100644 --- a/WIP.md +++ b/WIP.md @@ -67,17 +67,10 @@ The Maxwell/Pascal L1 data cache had similar tag stage performance but local and - TODO: - - overall results in parallel table - - parallel remove .0 where possible - - DONE: bold font for best results in parallel table - - DONE: remove parallel execution from exec driven simulate (only baseline) - - DONE: fix parallel plot table (multiple kernel launch ids) - - - DONE: compute blocks per sm metric for parallel, maybe we just need to scale up the workloads + - validate: add remote connection to das5 and das6 - make the basic plots look good for pascal pchase and write it down - run the same for older fermi or maxwell gpu on das5 and write it down - - GIVE UP: have another go at ampere - plot: compare mem only simulation and trace reconstruction error - plot: compute overall correlations for all metrics @@ -88,6 +81,13 @@ The Maxwell/Pascal L1 data cache had similar tag stage performance but local and - check if any line ever has different hit_cluster, otherwise makes no sense - this prob wont be interesting for l1, but maybe l2? + - DONE: overall results in parallel table + - GIVE UP: have another go at ampere + - DONE: parallel remove .0 where possible + - DONE: bold font for best results in parallel table + - DONE: remove parallel execution from exec driven simulate (only baseline) + - DONE: fix parallel plot table (multiple kernel launch ids) + - DONE: compute blocks per sm metric for parallel, maybe we just need to scale up the workloads - DONE: simulator: l1 latency should only affect HITS ==> l1 return queue - GIVE UP: research: try to understand the l1 tex cache hit rate nvprof metric - DONE: connect to das6 diff --git a/gpucachesim/benchmarks.py b/gpucachesim/benchmarks.py index 6593aa91..d7c98647 100644 --- a/gpucachesim/benchmarks.py +++ b/gpucachesim/benchmarks.py @@ -303,12 +303,16 @@ def construct_playground_simulate_target_config(self, node): class Benchmarks: + path: Path config: Config - def __init__(self, path: os.PathLike) -> None: + def __init__(self, path: typing.Optional[os.PathLike]) -> None: """load the materialized benchmark config""" - - with open(path or DEFAULT_BENCH_FILE, "rb") as f: + if path is None: + self.path = DEFAULT_BENCH_FILE + else: + self.path = Path(path) + with open(self.path, "rb") as f: benchmarks = yaml.load(f, Loader=BenchmarkLoader) self.config = benchmarks["config"] diff --git a/gpucachesim/stats/__init__.py b/gpucachesim/stats/__init__.py index acbf8ba9..13ca9425 100644 --- a/gpucachesim/stats/__init__.py +++ b/gpucachesim/stats/__init__.py @@ -20,7 +20,13 @@ import gpucachesim.plot as plot import gpucachesim.utils as utils -from gpucachesim.benchmarks import SIMULATE_FUNCTIONAL_CONFIG_COLS, Target, Benchmarks, GPUConfig, REPO_ROOT_DIR +from gpucachesim.benchmarks import ( + SIMULATE_FUNCTIONAL_CONFIG_COLS, + Target, + Benchmarks, + GPUConfig, + REPO_ROOT_DIR, +) # suppress scientific notation by setting float_format @@ -42,6 +48,7 @@ def main(): # ctx.ensure_object(dict) pass + def aggregate_benchmark_results( sim_df: pd.DataFrame, bench_name: str, @@ -118,11 +125,12 @@ def aggregate_benchmark_results( (128, 128, 128), (32, 64, 128), (128, 32, 32), - (32, 1024, 32), - (32, 2048, 32), - (32, 4096, 32), + (512, 32, 512), + # (32, 1024, 32), + # (32, 2048, 32), + # (32, 4096, 32), ], - columns=["input_m", "input_n", "input_p"] + columns=["input_m", "input_n", "input_p"], ) # print(subset.index) # print(selected_df.index) @@ -158,7 +166,9 @@ def aggregate_benchmark_results( ] ) - group_cols = benchmarks.BENCH_TARGET_INDEX_COLS + ["kernel_name", "run"] + input_cols + group_cols = ( + benchmarks.BENCH_TARGET_INDEX_COLS + ["kernel_name", "run"] + input_cols + ) # print(selected_df.index) # non_numeric_cols = sorted(selected_df.select_dtypes(include=["object"]).columns.tolist()) @@ -170,13 +180,16 @@ def aggregate_benchmark_results( **benchmarks.NON_NUMERIC_COLS, } aggregations = { - col: agg for col, agg in aggregations.items() + col: agg + for col, agg in aggregations.items() if col in selected_df and col not in group_cols } pprint(aggregations) # print(sorted(selected_df.columns.tolist())) - per_kernel = selected_df.groupby(group_cols, dropna=False).agg(aggregations).reset_index() + per_kernel = ( + selected_df.groupby(group_cols, dropna=False).agg(aggregations).reset_index() + ) # print(sorted(per_kernel.columns.tolist())) # selected_df.groupby(group_cols, dropna=False)[STAT_COLS].mean().reset_index() @@ -191,7 +204,8 @@ def aggregate_benchmark_results( **benchmarks.NON_NUMERIC_COLS, } aggregations = { - col: agg for col, agg in aggregations.items() + col: agg + for col, agg in aggregations.items() if col in per_kernel and not col in group_cols } pprint(aggregations) @@ -208,35 +222,647 @@ def aggregate_benchmark_results( # stat_cols = set(averaged.columns) - set(["benchmark"]) - set(input_cols) per_target_pivoted = per_target.pivot( - index=["benchmark"] + input_cols, columns="target", # values=STAT_COLS + index=["benchmark"] + input_cols, + columns="target", # values=STAT_COLS ) # per_target = averaged.set_index(["target", "benchmark"] + input_cols) return per_kernel, per_target_pivoted - def different_cols(df): return [col for col in df.columns if len(df[col].value_counts()) > 1] +class ParallelTableRow(typing.NamedTuple): + metric: str + threads: int + serial_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] + det_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] + nondet_values: typing.Sequence[typing.Tuple[float, typing.Union[float, int, str]]] + + def values(self): + values = [] + if self.serial_value is not None: + values.append(self.serial_value[0]) + if self.det_value is not None: + values.append(self.det_value[0]) + values += [v[0] for v in self.nondet_values] + return values + + +def build_parallel_table_rows( + df: pd.DataFrame, + num_benchmarks, + all_benchmarks, + thousands_round_to=1, + variable_precision=True, +) -> typing.Sequence[ParallelTableRow]: + interleave_n = list(itertools.product([False, True], [5, 10])) + table_rows: typing.Sequence[ParallelTableRow] = [] + + for threads in [4, 8]: + threads_mask = df["input_threads_parallel"] == threads + det_mask = df["input_mode_parallel"] == "deterministic" + nondet_no_interleave_mask = df["input_mode_parallel"] == "nondeterministic" + nondet_interleave_mask = ( + df["input_mode_parallel"] == "nondeterministic_interleave" + ) + # print([m.sum() for m in [ + # mask, threads_mask, det_mask, nondet_no_interleave_mask, nondet_interleave_mask + # ]]) + + det = df[threads_mask & det_mask] + # print( + # det[ + # bench_input_cols + # + [ + # "input_threads_parallel", + # "exec_time_sec_parallel", + # "input_id_parallel", + # "input_id_serial", + # # "dram_reads_serial", + # # "dram_reads_parallel", + # # "dram_reads_rel_err", + # "dram_writes_serial", + # "dram_writes_parallel", + # "dram_writes_rel_err", + # ] + different_cols(det) + # ] + # ) + print("===") + nondet_no_interleave = df[threads_mask & nondet_no_interleave_mask] + nondet_interleave = df[threads_mask & nondet_interleave_mask] + + assert len(det) == num_benchmarks + assert len(nondet_no_interleave) == 2 * num_benchmarks + assert len(nondet_interleave) == 2 * num_benchmarks + # assert ( + # len( + # df[[ + # "exec_time_sec_serial", + # "cycles_serial", + # "input_id_serial", + # ]].drop_duplicates() + # ) + # == 1 + # ) + + # exec time (speedup) + serial_exec_time = df.loc[threads_mask, "exec_time_sec_serial"].values[0] + det_exec_time = det["exec_time_sec_parallel"].values[0] + det_speedup = det["exec_time_sec_speedup"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + nondet_exec_time = nondet["exec_time_sec_parallel"].values[0] + nondet_speedup = nondet["exec_time_sec_speedup"].values[0] + if all_benchmarks: + nondet_values.append( + ( + nondet_speedup, + "${}x$".format( + plot.round_to_precision( + nondet_speedup, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + + else: + nondet_values.append( + ( + nondet_exec_time, + "${:>3.1f}s~({}x)$".format( + nondet_exec_time, + plot.round_to_precision( + nondet_speedup, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else (serial_exec_time, "${:>3.1f}s$".format(serial_exec_time)) + ) + if all_benchmarks: + det_value = ( + det_speedup, + "${}x$".format( + plot.round_to_precision( + det_speedup, round_to=1, variable_precision=variable_precision + ) + ), + ) + else: + det_value = ( + det_exec_time, + "${:>3.1f}s~({}x)$".format( + det_exec_time, + plot.round_to_precision( + det_speedup, round_to=1, variable_precision=variable_precision + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"exec\\time", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # cycles (rel err) + serial_cycles = int(df.loc[threads_mask, "cycles_serial"].values[0]) + det_cycles = int(det["cycles_parallel"].values[0]) + det_rel_err = det["cycles_rel_err"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + + nondet_cycles = int(nondet["cycles_parallel"].values[0]) + nondet_rel_err = nondet["cycles_rel_err"].values[0] + if all_benchmarks: + nondet_values.append( + ( + nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + else: + nondet_values.append( + ( + nondet_cycles, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else ( + serial_cycles, + "${}$".format( + plot.human_format_thousands( + serial_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if all_benchmarks: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + else: + det_value = ( + det_cycles, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric="cycles", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # l1 data hit rate (rel err) + serial_l1_hit_rate = df.loc[threads_mask, "l1_hit_rate_serial"].values[0] + det_l1_hit_rate = det["l1_hit_rate_parallel"].values[0] + det_rel_err = det["l1_hit_rate_rel_err"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + + nondet_l1_hit_rate = nondet["l1_hit_rate_parallel"].values[0] + nondet_rel_err = nondet["l1_hit_rate_rel_err"].values[0] + if all_benchmarks: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "{}\\%$".format( + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + 100.0 * nondet_l1_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision( + 100.0 * nondet_l1_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else ( + 100.0 * serial_l1_hit_rate, + "${:>2.1f}\\%$".format(100.0 * serial_l1_hit_rate), + ) + ) + if all_benchmarks: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + 100.0 * det_l1_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision( + 100.0 * det_l1_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + + table_rows.append( + ParallelTableRow( + metric=r"L1D\\hit rate", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # l2 data hit rate (rel err) + serial_l2_hit_rate = df.loc[threads_mask, "l2_hit_rate_serial"].values[0] + det_l2_hit_rate = det["l2_hit_rate_parallel"].values[0] + det_rel_err = det["l2_hit_rate_rel_err"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + + nondet_l2_hit_rate = nondet["l2_hit_rate_parallel"].values[0] + nondet_rel_err = nondet["l2_hit_rate_rel_err"].values[0] + if all_benchmarks: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + 100.0 * nondet_l2_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision( + 100.0 * nondet_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else ( + 100.0 * serial_l2_hit_rate, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * serial_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + if all_benchmarks: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + 100.0 * det_l2_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision( + 100.0 * det_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"L2D\\hit rate", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # dram reads (rel err) + serial_dram_reads = int(df.loc[threads_mask, "dram_reads_serial"].values[0]) + det_dram_reads = int(det["dram_reads_parallel"].values[0]) + det_rel_err = det["dram_reads_rel_err"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + + nondet_dram_reads = int(nondet["dram_reads_parallel"].values[0]) + nondet_rel_err = nondet["dram_reads_rel_err"].values[0] + if all_benchmarks: + nondet_values.append( + ( + nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + nondet_dram_reads, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else ( + serial_dram_reads, + "${}$".format( + plot.human_format_thousands( + serial_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if all_benchmarks: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + det_dram_reads, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + + table_rows.append( + ParallelTableRow( + metric=r"DRAM\\reads", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # dram writes (rel err) + serial_dram_writes = int(df.loc[threads_mask, "dram_writes_serial"].values[0]) + det_dram_writes = int(det["dram_writes_parallel"].values[0]) + det_rel_err = det["dram_writes_rel_err"].values[0] + nondet_values = [] + for interleave, n in interleave_n: + nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = nondet[nondet["input_run_ahead_parallel"] == n] + + nondet_dram_writes = int(nondet["dram_writes_parallel"].values[0]) + nondet_rel_err = nondet["dram_writes_rel_err"].values[0] + if all_benchmarks: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + nondet_dram_writes, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if all_benchmarks + else ( + serial_dram_writes, + "${}$".format( + plot.human_format_thousands( + serial_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if all_benchmarks: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + det_dram_writes, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"DRAM\\writes", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + return table_rows + + @main.command() # @click.pass_context +@click.option("--path", help="Path to materialized benchmark config") @click.option("--bench", "bench_name", help="Benchmark name") @click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") -def parallel_plot(bench_name, nsight): - # load the materialized benchmark config +def parallel_plot(bench_name, path, nsight): profiler = "nsight" if nsight else "nvprof" - if bench_name is None: - stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.csv".format(profiler) - else: - stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( - profiler, bench_name - ) + all_benchmarks = bench_name is None - print("loading {}".format(stats_file)) - selected_df = pd.read_csv(stats_file, header=0) + selected_df = load_stats(bench_name=bench_name, profiler=profiler, path=path) selected_df = selected_df[selected_df["target"] == Target.Simulate.value] - selected_df = selected_df[selected_df["benchmark"] == bench_name] + if not all_benchmarks: + selected_df = selected_df[selected_df["benchmark"] == bench_name] + + num_benchmarks = len(selected_df["benchmark"].unique().tolist()) if not (selected_df["is_release_build"] == True).all(): print(color("WARNING: non release results:", fg="red")) @@ -246,7 +872,9 @@ def parallel_plot(bench_name, nsight): print("====") bench_cols = ["target", "benchmark"] - bench_input_cols = benchmarks.BENCHMARK_INPUT_COLS[bench_name] + bench_input_cols = ( + [] if all_benchmarks else benchmarks.BENCHMARK_INPUT_COLS[bench_name] + ) input_cols = benchmarks.SIMULATE_INPUT_COLS print(bench_input_cols) print(input_cols) @@ -263,10 +891,13 @@ def parallel_plot(bench_name, nsight): aggregations = { **{c: "mean" for c in sorted(serial.columns)}, + **{c: "first" for c in serial.columns if c.startswith("input_")}, **benchmarks.NON_NUMERIC_COLS, } aggregations = {col: agg for col, agg in aggregations.items() if col in serial} - aggregations = {col: agg for col, agg in aggregations.items() if col not in group_cols} + aggregations = { + col: agg for col, agg in aggregations.items() if col not in group_cols + } mean_serial = serial.groupby(group_cols).agg(aggregations).reset_index() metric_cols = ["cycles", "exec_time_sec", "l2_hit_rate", "l1_hit_rate"] @@ -302,14 +933,20 @@ def parallel_plot(bench_name, nsight): print( serial.loc[ serial["input_id"] == input_id, - bench_cols + ["kernel_launch_id"] + bench_input_cols + benchmarks.SIMULATE_INPUT_COLS, + bench_cols + + ["kernel_launch_id"] + + bench_input_cols + + benchmarks.SIMULATE_INPUT_COLS, ] ) print("parallel input", input_id) print( parallel.loc[ parallel["input_id"] == input_id, - bench_cols + ["kernel_launch_id"] + bench_input_cols + benchmarks.SIMULATE_INPUT_COLS, + bench_cols + + ["kernel_launch_id"] + + bench_input_cols + + benchmarks.SIMULATE_INPUT_COLS, ] ) break @@ -318,28 +955,37 @@ def parallel_plot(bench_name, nsight): # join based on input_cols, NOT based on mode joined = parallel.merge( serial, - on=bench_cols + ["kernel_launch_id"] + bench_input_cols + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS, + on=bench_cols + + ["kernel_launch_id"] + + bench_input_cols + + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS, how="left", suffixes=("_parallel", "_serial"), ) - print("joined={} parallel={} serial={}".format(joined.shape, parallel.shape, serial.shape)) + print( + "joined={} parallel={} serial={}".format( + joined.shape, parallel.shape, serial.shape + ) + ) assert joined.shape[0] == parallel.shape[0] assert "mean_blocks_per_sm_parallel" in joined assert "total_cores_parallel" in joined assert "cores_per_cluster_parallel" in joined - + if len(joined) == 0: raise ValueError("joined parallel and serial dataframe is empty") PREVIEW_COLS = sorted( - bench_cols - + ["kernel_launch_id"] - + bench_input_cols - + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - + [c + "_parallel" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - + [c + "_parallel" for c in metric_cols] - + [c + "_serial" for c in metric_cols] - + ["input_id_serial", "input_id_parallel"] + list( + bench_cols + + ["kernel_launch_id"] + + bench_input_cols + + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + + [c + "_parallel" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + + [c + "_parallel" for c in metric_cols] + + [c + "_serial" for c in metric_cols] + + ["input_id_serial", "input_id_parallel"] + ) ) if True: @@ -360,7 +1006,7 @@ def parallel_plot(bench_name, nsight): # ["benchmark"] + bench_input_cols + [c + "_parallel" for c in SIMULATE_EXECUTION_CONFIG_COLS] ].drop_duplicates() ) - + group_cols = sorted( bench_cols + bench_input_cols @@ -374,7 +1020,9 @@ def parallel_plot(bench_name, nsight): **{c + "_serial": agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, } aggregations = {col: agg for col, agg in aggregations.items() if col in joined} - aggregations = {col: agg for col, agg in aggregations.items() if col not in group_cols} + aggregations = { + col: agg for col, agg in aggregations.items() if col not in group_cols + } if set(joined.columns.tolist()) - set(group_cols) != set(aggregations.keys()): pprint( @@ -392,7 +1040,7 @@ def rel_err(true_values, values): true_values = true_values.fillna(0.0) rel_err = (values - true_values).abs() / true_values rel_err = rel_err.fillna(0.0) - rel_err[rel_err == 0.0] = 0.0 + rel_err[rel_err == 0.0] = 0.0 return rel_err def rmse(true_values, values): @@ -454,7 +1102,7 @@ def mae(true_values, values): # grouped.apply(lambda df: print(df.reset_index()[ # bench_cols - # + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + # + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS # + ["input_variant"] # + ["dram_writes_serial", "dram_writes_parallel"] # ])) @@ -536,20 +1184,26 @@ def mae(true_values, values): dict( name="matrixmul", inputs={ - **{"input_dtype": 32, "input_rows": 256}, + **{"input_dtype": 32, "input_rows": 512}, **functional_config, }, ), dict( name="simple_matrixmul", inputs={ - **{"input_dtype": 32, "input_m": 128, "input_n": 128, "input_p": 128}, + **{ + "input_dtype": 32, + "input_m": 512, + "input_n": 32, + "input_p": 512, + }, **functional_config, }, ), - ] + # print(aggregated[bench_input_cols].drop_duplicates()) + def compute_label(bench_config, df): benchmark = df["benchmark"] bench_input_cols = benchmarks.BENCHMARK_INPUT_COLS[benchmark] @@ -567,7 +1221,16 @@ def compute_label(bench_config, df): assert bench_config["inputs"]["input_cores_per_cluster"] == cores_per_cluster assert bench_config["inputs"]["input_num_clusters"] == num_clusters - print(df[["benchmark", "input_cores_per_cluster", "input_num_clusters", "total_cores_parallel"]]) + print( + df[ + [ + "benchmark", + "input_cores_per_cluster", + "input_num_clusters", + "total_cores_parallel", + ] + ] + ) assert total_cores == int(df[["total_cores_parallel"]].values[0]) match benchmark.lower(): @@ -607,384 +1270,527 @@ def compute_label(bench_config, df): ) return label - interleave_n = list(itertools.product([False, True], [5, 10])) + def write_table_row(row, bold_values=None): + if bold_values is None: + bold_values = set() + + def bold(v, formatted_v): + if v in bold_values: + formatted_v = formatted_v.strip() + is_math = formatted_v[0] == "$" and formatted_v[-1] == "$" + if is_math: + return r"\boldmath" + str(formatted_v) + else: + return r"\textbf{" + str(formatted_v) + "}" + return str(formatted_v) + + is_first_metric_row = row.threads == 4 + is_last_metric_row = row.threads == 8 + + table_row = "" + + # metric name + if is_first_metric_row: + table_row += r"\multirow{2}{*}{\shortstack[l]{" + str(row.metric) + r"}}" + + # threads + table_row += r" & $t=" + str(row.threads) + r"$ " + + # serial value + if row.serial_value is not None and is_first_metric_row: + table_row += ( + r" & \multirow{2}{*}{\shortstack[l]{" + + bold(row.serial_value[0], row.serial_value[1]) + + r"}} " + ) + else: + table_row += r" & " - table = "" + # deterministic value + if row.det_value is not None: + table_row += r" & " + bold(row.det_value[0], row.det_value[1]) + else: + table_row += r" & " - for bench_config in selected_benchmarks: - bench_inputs: typing.Dict[str, typing.Any] = bench_config["inputs"] - if not all(aggregated["benchmark"] == bench_config["name"]): - print(color("SKIP: want {} (have {})".format( - bench_config["name"], aggregated["benchmark"][0]), fg="red")) - continue + # nondeterministic value + for nondet_value, formatted_nondet_value in row.nondet_values: + table_row += r" & " + bold(nondet_value, formatted_nondet_value) + table_row += r" \\ " + if is_last_metric_row: + table_row += r" \hline " + table_row += "\n" + return table_row - print("==> {}".format(bench_config["name"])) - mask_cols = ["benchmark"] + list(bench_inputs.keys()) - mask_values = [bench_name] + list(bench_inputs.values()) + table = "" + # thousands_round_to = 1 + # variable_precision = True + + if all_benchmarks: + # mask_cols = ["benchmark"] + list(bench_inputs.keys()) + # mask_values = [bench_name] + list(bench_inputs.values()) # mask = aggregated["benchmark"] == bench_name # for col, value in zip(mask_cols, mask_values): # mask &= aggregated[col] == value # print((aggregated[mask_cols] == mask_values).sum(axis=0)) - - - mask = (aggregated[mask_cols] == mask_values).all(axis=1) - test_df = aggregated.loc[ - mask, - benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - + bench_input_cols - + ["mean_blocks_per_sm_parallel"]] - test_df = test_df.drop_duplicates() - assert len(test_df) == 1 - - table += "%\n%\n" - table += ( - r"\rowcolor{gray!10} \multicolumn{8}{c}{\textbf{" - + str(compute_label(bench_config, aggregated.loc[mask].iloc[0])) - + r"}} \\ \hline" - + "\n" - ) - - def write_table_row(row, bold_values=None): - if bold_values is None: - bold_values = set() - - def bold(v, formatted_v): - if v in bold_values: - formatted_v = formatted_v.strip() - is_math = formatted_v[0] == "$" and formatted_v[-1] == "$" - if is_math: - return r"\boldmath" + str(formatted_v) - else: - return r"\textbf{" + str(formatted_v) + "}" - return str(formatted_v) - - is_first_metric_row = row.threads == 4 - is_last_metric_row = row.threads == 8 - - table_row = "" - # metric name - if is_first_metric_row: - table_row += ( - r"\multirow{2}{*}{\shortstack[l]{" + str(row.metric) + r"}}" - ) - # threads - table_row += r" & $t=" + str(row.threads) + r"$ " - # serial value - if is_first_metric_row: - table_row += ( - r" & \multirow{2}{*}{\shortstack[l]{" - + bold(row.serial_value[0], row.serial_value[1]) - + r"}} " - ) - else: - table_row += r" & " - # deterministic value - table_row += r" & " + bold(row.det_value[0], row.det_value[1]) - # nondeterministic value - for nondet_value, formatted_nondet_value in row.nondet_values: - table_row += r" & " + bold(nondet_value, formatted_nondet_value) - table_row += r" \\ " - if is_last_metric_row: - table_row += r" \hline " - table_row += "\n" - return table_row - - class TableRow(typing.NamedTuple): - metric: str - threads: int - serial_value: typing.Tuple[float, typing.Union[float, int, str]] - det_value: typing.Tuple[float, typing.Union[float, int, str]] - nondet_values: typing.Sequence[typing.Tuple[float, typing.Union[float, int, str]]] - - def values(self): - return [self.serial_value[0], self.det_value[0]] + [v[0] for v in self.nondet_values] - - table_rows: typing.Sequence[TableRow] = [] - - for threads in [4, 8]: - threads_mask = aggregated["input_threads_parallel"] == threads - det_mask = aggregated["input_mode_parallel"] == "deterministic" - nondet_no_interleave_mask = ( - aggregated["input_mode_parallel"] == "nondeterministic" - ) - nondet_interleave_mask = ( - aggregated["input_mode_parallel"] == "nondeterministic_interleave" + for functional_config in functional_configs: + mask_cols = list(functional_config.keys()) + mask_values = list(functional_config.values()) + mask = (aggregated[mask_cols] == mask_values).all(axis=1) + + # df = aggregated[mask] + # test_df = aggregated.loc[ + # mask, + # benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + # + bench_input_cols + # + ["mean_blocks_per_sm_parallel"]] + # test_df = test_df.drop_duplicates() + # assert len(test_df) == 1 + + label = "Average @ {} SM's".format( # [{:.2f} CTA/SM]".format( + int(aggregated.loc[mask, "total_cores_parallel"].values[0]), + # float(aggregated.loc[mask, "mean_blocks_per_sm_parallel"].values[0]), ) - # print([m.sum() for m in [ - # mask, threads_mask, det_mask, nondet_no_interleave_mask, nondet_interleave_mask - # ]]) - det = aggregated[mask & threads_mask & det_mask] - print( - det[ - bench_input_cols - + [ - "input_threads_parallel", - "exec_time_sec_parallel", - "input_id_parallel", - "input_id_serial", - # "dram_reads_serial", - # "dram_reads_parallel", - # "dram_reads_rel_err", - "dram_writes_serial", - "dram_writes_parallel", - "dram_writes_rel_err", - ] + different_cols(det) - ] - ) - print("===") - assert len(det) == 1 - nondet_no_interleave = aggregated[ - mask & threads_mask & nondet_no_interleave_mask - ] - assert len(nondet_no_interleave) == 2 - nondet_interleave = aggregated[ - mask & threads_mask & nondet_interleave_mask - ] - assert len(nondet_interleave) == 2 - assert ( - len( - aggregated.loc[ - mask, - [ - "exec_time_sec_serial", - "cycles_serial", - "input_id_serial", - ], - ].drop_duplicates() - ) - == 1 + table += "%\n%\n" + table += ( + r"\rowcolor{gray!10} \multicolumn{8}{c}{\textbf{" + + label + + r"}} \\ \hline" + + "\n" ) - thousands_round_to = 1 - variable_precision = True - - # exec time (speedup) - serial_exec_time = aggregated.loc[ - mask & threads_mask, "exec_time_sec_serial" - ].values[0] - det_exec_time = det["exec_time_sec_parallel"].values[0] - det_speedup = det["exec_time_sec_speedup"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - nondet_exec_time = nondet["exec_time_sec_parallel"].values[0] - nondet_speedup = nondet["exec_time_sec_speedup"].values[0] - nondet_values.append( - (nondet_exec_time, "${:>3.1f}s~({}x)$".format( - nondet_exec_time, - plot.round_to_precision(nondet_speedup, round_to=1, variable_precision=variable_precision) - )) - ) - - table_rows.append( - TableRow( - metric=r"exec\\time", - threads=threads, - serial_value=(serial_exec_time, "${:>3.1f}s$".format(serial_exec_time)), - det_value=(det_exec_time, "${:>3.1f}s~({}x)$".format( - det_exec_time, - plot.round_to_precision(det_speedup, round_to=1, - variable_precision=variable_precision) - )), - nondet_values=nondet_values, - ) + table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( + aggregated[mask], num_benchmarks=num_benchmarks, all_benchmarks=True ) - # cycles (rel err) - serial_cycles = int(aggregated.loc[mask & threads_mask, "cycles_serial"].values[0]) - det_cycles = int(det["cycles_parallel"].values[0]) - det_rel_err = det["cycles_rel_err"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - - nondet_cycles = int(nondet["cycles_parallel"].values[0]) - nondet_rel_err = nondet["cycles_rel_err"].values[0] - nondet_values.append( - (nondet_cycles, "${} ({}\\%)$".format( - plot.human_format_thousands(nondet_cycles, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, - variable_precision=variable_precision) - - )) - ) + # for threads in [4, 8]: + # threads_mask = aggregated["input_threads_parallel"] == threads + # det_mask = aggregated["input_mode_parallel"] == "deterministic" + # nondet_no_interleave_mask = ( + # aggregated["input_mode_parallel"] == "nondeterministic" + # ) + # nondet_interleave_mask = ( + # aggregated["input_mode_parallel"] == "nondeterministic_interleave" + # ) + # + # det = aggregated[mask & threads_mask & det_mask] + # + # # det_preview = det[ + # # PREVIEW_COLS + # # + ["input_threads_parallel", "cycles_rel_err", "exec_time_sec_speedup"] + # # ] + # + # print("===") + # nondet_no_interleave = aggregated[ + # mask & threads_mask & nondet_no_interleave_mask + # ] + # nondet_interleave = aggregated[ + # mask & threads_mask & nondet_interleave_mask + # ] + # + # assert len(det) == num_benchmarks + # assert len(nondet_interleave) == 2 * num_benchmarks + # assert len(nondet_no_interleave) == 2 * num_benchmarks + # + # # exec time (speedup) + # det_speedup = det["exec_time_sec_speedup"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # nondet_speedup = nondet["exec_time_sec_speedup"].values[0] + # nondet_values.append( + # (nondet_speedup, "${}x$".format( + # plot.round_to_precision(nondet_speedup, round_to=1, variable_precision=variable_precision) + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"exec\\time", + # threads=threads, + # serial_value=None, + # det_value=(det_speedup, "${}x$".format( + # plot.round_to_precision(det_speedup, round_to=1, + # variable_precision=variable_precision) + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # cycles (rel err) + # serial_cycles = int(aggregated.loc[mask & threads_mask, "cycles_serial"].values[0]) + # det_cycles = int(det["cycles_parallel"].values[0]) + # det_rel_err = det["cycles_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_cycles = int(nondet["cycles_parallel"].values[0]) + # nondet_rel_err = nondet["cycles_rel_err"].values[0] + # nondet_values.append( + # (nondet_cycles, "${} ({}\\%)$".format( + # plot.human_format_thousands(nondet_cycles, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision) + # + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric="cycles", + # threads=threads, + # serial_value=(serial_cycles, "${}$".format(plot.human_format_thousands(serial_cycles, round_to=thousands_round_to, variable_precision=variable_precision))), + # det_value=(det_cycles, "${} ({}\\%)$".format( + # plot.human_format_thousands(det_cycles, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision) + # + # )), + # nondet_values=nondet_values, + # ) + # ) + + table += "%\n%\n" + + table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) + for row in table_rows: + bold_values = [] + if row.metric == r"exec\\time": + bold_values = [np.amin(row.values())] + print(row.metric, bold_values, row.values()) + table += write_table_row(row, bold_values) - table_rows.append( - TableRow( - metric="cycles", - threads=threads, - serial_value=(serial_cycles, "${}$".format(plot.human_format_thousands(serial_cycles, round_to=thousands_round_to, variable_precision=variable_precision))), - det_value=(det_cycles, "${} ({}\\%)$".format( - plot.human_format_thousands(det_cycles, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * det_rel_err, round_to=1, - variable_precision=variable_precision) - - )), - nondet_values=nondet_values, - ) - ) - - # l1 data hit rate (rel err) - serial_l1_hit_rate = aggregated.loc[mask & threads_mask, "l1_hit_rate_serial"].values[0] - det_l1_hit_rate = det["l1_hit_rate_parallel"].values[0] - det_rel_err = det["l1_hit_rate_rel_err"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - - nondet_l1_hit_rate = nondet["l1_hit_rate_parallel"].values[0] - nondet_rel_err = nondet["l1_hit_rate_rel_err"].values[0] - nondet_values.append( - (100.0 * nondet_l1_hit_rate, "${}\\%~({}\\%)$".format( - plot.round_to_precision(100.0 * nondet_l1_hit_rate, round_to=1, - variable_precision=variable_precision), - plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, - variable_precision=variable_precision), - )) - ) - - table_rows.append( - TableRow( - metric=r"L1D\\hit rate", - threads=threads, - serial_value=(100.0 * serial_l1_hit_rate, "${:>2.1f}\\%$".format(100.0 * serial_l1_hit_rate)), - det_value=(100.0 * det_l1_hit_rate, "${}\\%~({}\\%)$".format( - plot.round_to_precision(100.0 * det_l1_hit_rate, round_to=1, - variable_precision=variable_precision), - plot.round_to_precision(100.0 * det_rel_err, round_to=1, - variable_precision=variable_precision), - )), - nondet_values=nondet_values, - ) - ) - - # l2 data hit rate (rel err) - serial_l2_hit_rate = aggregated.loc[mask & threads_mask, "l2_hit_rate_serial"].values[0] - det_l2_hit_rate = det["l2_hit_rate_parallel"].values[0] - det_rel_err = det["l2_hit_rate_rel_err"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - - nondet_l2_hit_rate = nondet["l2_hit_rate_parallel"].values[0] - nondet_rel_err = nondet["l2_hit_rate_rel_err"].values[0] - nondet_values.append( - (100.0 * nondet_l2_hit_rate, "${}\\%~({}\\%)$".format( - plot.round_to_precision(100.0 * nondet_l2_hit_rate, round_to=1, - variable_precision=variable_precision), - plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, - variable_precision=variable_precision), - )) - ) - - table_rows.append( - TableRow( - metric=r"L2D\\hit rate", - threads=threads, - serial_value=( - 100.0 * serial_l2_hit_rate, - "${}\\%$".format( - plot.round_to_precision( - 100.0 * serial_l2_hit_rate, - round_to=1, variable_precision=variable_precision) - )), - det_value=(100.0 * det_l2_hit_rate, "${}\\%~({}\\%)$".format( - plot.round_to_precision(100.0 * det_l2_hit_rate, round_to=1, - variable_precision=variable_precision), - plot.round_to_precision(100.0 * det_rel_err, round_to=1, - variable_precision=variable_precision), - )), - nondet_values=nondet_values, - ) - ) - - # dram reads (rel err) - serial_dram_reads = int(aggregated.loc[mask & threads_mask, "dram_reads_serial"].values[0]) - det_dram_reads = int(det["dram_reads_parallel"].values[0]) - det_rel_err = det["dram_reads_rel_err"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - - nondet_dram_reads = int(nondet["dram_reads_parallel"].values[0]) - nondet_rel_err = nondet["dram_reads_rel_err"].values[0] - nondet_values.append( - (nondet_dram_reads, "${} ({}\\%)$".format( - plot.human_format_thousands(nondet_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, - variable_precision=variable_precision), - - )) - ) - - table_rows.append( - TableRow( - metric=r"DRAM\\reads", - threads=threads, - serial_value=(serial_dram_reads, "${}$".format(plot.human_format_thousands(serial_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision))), - det_value=(det_dram_reads, "${} ({}\\%)$".format( - plot.human_format_thousands(det_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * det_rel_err, round_to=1, - variable_precision=variable_precision), - - )), - nondet_values=nondet_values, + else: + for bench_config in selected_benchmarks: + bench_inputs: typing.Dict[str, typing.Any] = bench_config["inputs"] + if not all(aggregated["benchmark"] == bench_config["name"]): + print( + color( + "SKIP: want {} (have {})".format( + bench_config["name"], aggregated["benchmark"][0] + ), + fg="red", + ) ) + continue + + print("==> {}".format(bench_config["name"])) + mask_cols = ["benchmark"] + list(bench_inputs.keys()) + mask_values = [bench_name] + list(bench_inputs.values()) + # mask = aggregated["benchmark"] == bench_name + # for col, value in zip(mask_cols, mask_values): + # mask &= aggregated[col] == value + # print((aggregated[mask_cols] == mask_values).sum(axis=0)) + + mask = (aggregated[mask_cols] == mask_values).all(axis=1) + test_df = aggregated.loc[ + mask, + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + + bench_input_cols + + ["mean_blocks_per_sm_parallel"], + ] + test_df = test_df.drop_duplicates() + print(test_df) + assert len(test_df) == 1 + + table += "%\n%\n" + table += ( + r"\rowcolor{gray!10} \multicolumn{8}{c}{\textbf{" + + str(compute_label(bench_config, aggregated.loc[mask].iloc[0])) + + r"}} \\ \hline" + + "\n" ) - # dram writes (rel err) - serial_dram_writes = int( - aggregated.loc[mask & threads_mask, "dram_writes_serial"].values[0] + table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( + aggregated[mask], num_benchmarks=num_benchmarks, all_benchmarks=False ) - det_dram_writes = int(det["dram_writes_parallel"].values[0]) - det_rel_err = det["dram_writes_rel_err"].values[0] - nondet_values = [] - for interleave, n in interleave_n: - nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = nondet[nondet["input_run_ahead_parallel"] == n] - - nondet_dram_writes = int(nondet["dram_writes_parallel"].values[0]) - nondet_rel_err = nondet["dram_writes_rel_err"].values[0] - nondet_values.append( - (nondet_dram_writes, "${} ({}\\%)$".format( - plot.human_format_thousands(nondet_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, - variable_precision=variable_precision), - - )) - ) - - table_rows.append( - TableRow( - metric=r"DRAM\\writes", - threads=threads, - serial_value=(serial_dram_writes, "${}$".format(plot.human_format_thousands(serial_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision))), - # serial_value="${:>4}$".format(), - det_value=(det_dram_writes, "${} ({}\\%)$".format( - plot.human_format_thousands(det_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision), - plot.round_to_precision(100.0 * det_rel_err, round_to=1, - variable_precision=variable_precision), - - )), - nondet_values=nondet_values, - ) - ) - - table += "%\n%\n" - - table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) - for row in table_rows: - bold_values = [] - if row.metric == r"exec\\time": - bold_values = [np.amin(row.values())] - print(row.metric, bold_values, row.values()) - table += write_table_row(row, bold_values) + # table_rows: typing.Sequence[TableRow] = [] + + # for threads in [4, 8]: + # threads_mask = aggregated["input_threads_parallel"] == threads + # det_mask = aggregated["input_mode_parallel"] == "deterministic" + # nondet_no_interleave_mask = ( + # aggregated["input_mode_parallel"] == "nondeterministic" + # ) + # nondet_interleave_mask = ( + # aggregated["input_mode_parallel"] == "nondeterministic_interleave" + # ) + # # print([m.sum() for m in [ + # # mask, threads_mask, det_mask, nondet_no_interleave_mask, nondet_interleave_mask + # # ]]) + # + # det = aggregated[mask & threads_mask & det_mask] + # print( + # det[ + # bench_input_cols + # + [ + # "input_threads_parallel", + # "exec_time_sec_parallel", + # "input_id_parallel", + # "input_id_serial", + # # "dram_reads_serial", + # # "dram_reads_parallel", + # # "dram_reads_rel_err", + # "dram_writes_serial", + # "dram_writes_parallel", + # "dram_writes_rel_err", + # ] + different_cols(det) + # ] + # ) + # print("===") + # nondet_no_interleave = aggregated[ + # mask & threads_mask & nondet_no_interleave_mask + # ] + # nondet_interleave = aggregated[ + # mask & threads_mask & nondet_interleave_mask + # ] + # + # assert len(det) == num_benchmarks + # assert len(nondet_no_interleave) == 2 * num_benchmarks + # assert len(nondet_interleave) == 2 * num_benchmarks + # assert ( + # len( + # aggregated.loc[ + # mask, + # [ + # "exec_time_sec_serial", + # "cycles_serial", + # "input_id_serial", + # ], + # ].drop_duplicates() + # ) + # == 1 + # ) + # + # # exec time (speedup) + # serial_exec_time = aggregated.loc[ + # mask & threads_mask, "exec_time_sec_serial" + # ].values[0] + # det_exec_time = det["exec_time_sec_parallel"].values[0] + # det_speedup = det["exec_time_sec_speedup"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # nondet_exec_time = nondet["exec_time_sec_parallel"].values[0] + # nondet_speedup = nondet["exec_time_sec_speedup"].values[0] + # nondet_values.append( + # (nondet_exec_time, "${:>3.1f}s~({}x)$".format( + # nondet_exec_time, + # plot.round_to_precision(nondet_speedup, round_to=1, variable_precision=variable_precision) + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"exec\\time", + # threads=threads, + # serial_value=(serial_exec_time, "${:>3.1f}s$".format(serial_exec_time)), + # det_value=(det_exec_time, "${:>3.1f}s~({}x)$".format( + # det_exec_time, + # plot.round_to_precision(det_speedup, round_to=1, + # variable_precision=variable_precision) + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # cycles (rel err) + # serial_cycles = int(aggregated.loc[mask & threads_mask, "cycles_serial"].values[0]) + # det_cycles = int(det["cycles_parallel"].values[0]) + # det_rel_err = det["cycles_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_cycles = int(nondet["cycles_parallel"].values[0]) + # nondet_rel_err = nondet["cycles_rel_err"].values[0] + # nondet_values.append( + # (nondet_cycles, "${} ({}\\%)$".format( + # plot.human_format_thousands(nondet_cycles, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision) + # + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric="cycles", + # threads=threads, + # serial_value=(serial_cycles, "${}$".format(plot.human_format_thousands(serial_cycles, round_to=thousands_round_to, variable_precision=variable_precision))), + # det_value=(det_cycles, "${} ({}\\%)$".format( + # plot.human_format_thousands(det_cycles, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision) + # + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # l1 data hit rate (rel err) + # serial_l1_hit_rate = aggregated.loc[mask & threads_mask, "l1_hit_rate_serial"].values[0] + # det_l1_hit_rate = det["l1_hit_rate_parallel"].values[0] + # det_rel_err = det["l1_hit_rate_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_l1_hit_rate = nondet["l1_hit_rate_parallel"].values[0] + # nondet_rel_err = nondet["l1_hit_rate_rel_err"].values[0] + # nondet_values.append( + # (100.0 * nondet_l1_hit_rate, "${}\\%~({}\\%)$".format( + # plot.round_to_precision(100.0 * nondet_l1_hit_rate, round_to=1, + # variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision), + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"L1D\\hit rate", + # threads=threads, + # serial_value=(100.0 * serial_l1_hit_rate, "${:>2.1f}\\%$".format(100.0 * serial_l1_hit_rate)), + # det_value=(100.0 * det_l1_hit_rate, "${}\\%~({}\\%)$".format( + # plot.round_to_precision(100.0 * det_l1_hit_rate, round_to=1, + # variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision), + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # l2 data hit rate (rel err) + # serial_l2_hit_rate = aggregated.loc[mask & threads_mask, "l2_hit_rate_serial"].values[0] + # det_l2_hit_rate = det["l2_hit_rate_parallel"].values[0] + # det_rel_err = det["l2_hit_rate_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_l2_hit_rate = nondet["l2_hit_rate_parallel"].values[0] + # nondet_rel_err = nondet["l2_hit_rate_rel_err"].values[0] + # nondet_values.append( + # (100.0 * nondet_l2_hit_rate, "${}\\%~({}\\%)$".format( + # plot.round_to_precision(100.0 * nondet_l2_hit_rate, round_to=1, + # variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision), + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"L2D\\hit rate", + # threads=threads, + # serial_value=( + # 100.0 * serial_l2_hit_rate, + # "${}\\%$".format( + # plot.round_to_precision( + # 100.0 * serial_l2_hit_rate, + # round_to=1, variable_precision=variable_precision) + # )), + # det_value=(100.0 * det_l2_hit_rate, "${}\\%~({}\\%)$".format( + # plot.round_to_precision(100.0 * det_l2_hit_rate, round_to=1, + # variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision), + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # dram reads (rel err) + # serial_dram_reads = int(aggregated.loc[mask & threads_mask, "dram_reads_serial"].values[0]) + # det_dram_reads = int(det["dram_reads_parallel"].values[0]) + # det_rel_err = det["dram_reads_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_dram_reads = int(nondet["dram_reads_parallel"].values[0]) + # nondet_rel_err = nondet["dram_reads_rel_err"].values[0] + # nondet_values.append( + # (nondet_dram_reads, "${} ({}\\%)$".format( + # plot.human_format_thousands(nondet_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision), + # + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"DRAM\\reads", + # threads=threads, + # serial_value=(serial_dram_reads, "${}$".format(plot.human_format_thousands(serial_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision))), + # det_value=(det_dram_reads, "${} ({}\\%)$".format( + # plot.human_format_thousands(det_dram_reads, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision), + # + # )), + # nondet_values=nondet_values, + # ) + # ) + # + # # dram writes (rel err) + # serial_dram_writes = int( + # aggregated.loc[mask & threads_mask, "dram_writes_serial"].values[0] + # ) + # det_dram_writes = int(det["dram_writes_parallel"].values[0]) + # det_rel_err = det["dram_writes_rel_err"].values[0] + # nondet_values = [] + # for interleave, n in interleave_n: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # nondet = nondet[nondet["input_run_ahead_parallel"] == n] + # + # nondet_dram_writes = int(nondet["dram_writes_parallel"].values[0]) + # nondet_rel_err = nondet["dram_writes_rel_err"].values[0] + # nondet_values.append( + # (nondet_dram_writes, "${} ({}\\%)$".format( + # plot.human_format_thousands(nondet_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * nondet_rel_err, round_to=1, + # variable_precision=variable_precision), + # + # )) + # ) + # + # table_rows.append( + # TableRow( + # metric=r"DRAM\\writes", + # threads=threads, + # serial_value=(serial_dram_writes, "${}$".format(plot.human_format_thousands(serial_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision))), + # # serial_value="${:>4}$".format(), + # det_value=(det_dram_writes, "${} ({}\\%)$".format( + # plot.human_format_thousands(det_dram_writes, round_to=thousands_round_to, variable_precision=variable_precision), + # plot.round_to_precision(100.0 * det_rel_err, round_to=1, + # variable_precision=variable_precision), + # + # )), + # nondet_values=nondet_values, + # ) + # ) + + table += "%\n%\n" + + table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) + for row in table_rows: + bold_values = [] + if row.metric == r"exec\\time": + bold_values = [np.amin(row.values())] + print(row.metric, bold_values, row.values()) + table += write_table_row(row, bold_values) print(table) utils.copy_to_clipboard(table) @@ -994,27 +1800,46 @@ def values(self): def flatten(l): return [item for ll in l for item in ll] -@main.command() -@click.option("--path", help="Path to materialized benchmark config") -@click.option("--bench", "bench_name_arg", help="Benchmark name") -@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") -def correlation_plots(path, bench_name_arg, nsight): - profiler = "nsight" if nsight else "nvprof" + +def load_stats(bench_name, profiler="nvprof", path=None) -> pd.DataFrame: stats = [] - if bench_name_arg is not None: - stats.append(pd.read_csv(REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( - profiler, bench_name_arg - ))) + if bench_name is not None: + stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( + profiler, bench_name + ) + print("loading {}".format(stats_file)) + df = pd.read_csv(stats_file, header=0) + if len(df) < 1: + print(color("WARNING: {} is empty!".format(stats_file), fg="red")) + else: + stats.append(df) else: b = Benchmarks(path) benches = flatten(list(b.benchmarks[Target.Profile.value].values())) bench_names = set([b["name"] for b in benches]) for bench_name in bench_names: - stats.append(pd.read_csv(REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( + stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( profiler, bench_name - ))) - stats = pd.concat(stats, ignore_index=False) - stats = stats.sort_values(["benchmark", "target"]) + ) + print("loading {}".format(stats_file)) + df = pd.read_csv(stats_file, header=0) + if len(df) < 1: + print(color("WARNING: {} is empty!".format(stats_file), fg="red")) + else: + stats.append(df) + + stats_df = pd.concat(stats, ignore_index=False) + stats_df = stats_df.sort_values(["benchmark", "target"]) + return stats_df + + +@main.command() +@click.option("--path", help="Path to materialized benchmark config") +@click.option("--bench", "bench_name_arg", help="Benchmark name") +@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") +def correlation_plots(path, bench_name_arg, nsight): + profiler = "nsight" if nsight else "nvprof" + stats = load_stats(bench_name=bench_name_arg, profiler=profiler, path=path) print(stats.shape) stat_cols = stat_cols_for_profiler(profiler) @@ -1024,7 +1849,6 @@ def correlation_plots(path, bench_name_arg, nsight): stat_cols += ["instructions", "l2_accesses", "dram_reads", "dram_writes"] for stat_col in stat_cols: - stat_config = STAT_CONFIGS.get(stat_col) or StatConfig( **{**DEFAULT_STAT_CONFIG._asdict(), **dict(label=stat_col)} ) @@ -1038,7 +1862,9 @@ def correlation_plots(path, bench_name_arg, nsight): print(bench_name) bench_input_cols = benchmarks.BENCHMARK_INPUT_COLS[bench_name] - bench_df = bench_df.set_index(["target"] + benchmarks.SIMULATE_INPUT_COLS).sort_index() + bench_df = bench_df.set_index( + ["target"] + benchmarks.SIMULATE_INPUT_COLS + ).sort_index() def gpucachesim_baseline(target, memory_only=False): # "input_mode", "input_threads", "input_run_ahead", @@ -1051,7 +1877,9 @@ def gpucachesim_baseline(target, memory_only=False): **{c: "mean" for c in set(bench_df.columns) - set(group_cols)}, **benchmarks.NON_NUMERIC_COLS, } - aggregations = {col: agg for col, agg in aggregations.items() if col in bench_df} + aggregations = { + col: agg for col, agg in aggregations.items() if col in bench_df + } native = bench_df.loc[Target.Profile.value] native = native.groupby(bench_input_cols).agg(aggregations) @@ -1059,14 +1887,24 @@ def gpucachesim_baseline(target, memory_only=False): accelsim = bench_df.loc[Target.AccelsimSimulate.value] accelsim = accelsim.groupby(bench_input_cols).agg(aggregations) - gpucachesim = bench_df.loc[gpucachesim_baseline(target=Target.Simulate.value, memory_only=False)] + gpucachesim = bench_df.loc[ + gpucachesim_baseline(target=Target.Simulate.value, memory_only=False) + ] gpucachesim = gpucachesim.groupby(bench_input_cols).agg(aggregations) - gpucachesim_memory_only = bench_df.loc[gpucachesim_baseline(Target.Simulate.value, memory_only=True)] - gpucachesim_memory_only = gpucachesim_memory_only.groupby(bench_input_cols).agg(aggregations) + gpucachesim_memory_only = bench_df.loc[ + gpucachesim_baseline(Target.Simulate.value, memory_only=True) + ] + gpucachesim_memory_only = gpucachesim_memory_only.groupby( + bench_input_cols + ).agg(aggregations) - gpucachesim_trace_reconstruction = bench_df.loc[Target.ExecDrivenSimulate.value] - gpucachesim_trace_reconstruction = gpucachesim_trace_reconstruction.groupby(bench_input_cols).agg(aggregations) + gpucachesim_trace_reconstruction = bench_df.loc[ + Target.ExecDrivenSimulate.value + ] + gpucachesim_trace_reconstruction = gpucachesim_trace_reconstruction.groupby( + bench_input_cols + ).agg(aggregations) print("native ", native.shape) print("accelsim ", accelsim.shape) @@ -1074,16 +1912,23 @@ def gpucachesim_baseline(target, memory_only=False): print("gpucachesim (mem only) ", gpucachesim_memory_only.shape) print("gpucachesim (exec driven) ", gpucachesim_trace_reconstruction.shape) - targets = [ (("native", "native", "o"), native), (("AccelSim", "accelsim", "o"), accelsim), (("gpucachesim", "gpucachesim", "o"), gpucachesim), - (("gpucachesim (memory only)", "gpucachesim", "x"), gpucachesim_memory_only), - (("gpucachesim (exec driven)", "gpucachesim", "D"), gpucachesim_trace_reconstruction), + ( + ("gpucachesim (memory only)", "gpucachesim", "x"), + gpucachesim_memory_only, + ), + ( + ("gpucachesim (exec driven)", "gpucachesim", "D"), + gpucachesim_trace_reconstruction, + ), ] - assert all([len(target_df) == len(targets[0][1]) for _, target_df in targets]) - + assert all( + [len(target_df) == len(targets[0][1]) for _, target_df in targets] + ) + plt.rcParams.update({"font.size": fontsize, "font.family": font_family}) fig = plt.figure( figsize=(0.5 * plot.DINA4_WIDTH_INCHES, 0.5 * plot.DINA4_WIDTH_INCHES), @@ -1115,14 +1960,17 @@ def gpucachesim_baseline(target, memory_only=False): stat_col_min = all_targets_df[stat_col].min() stat_col_max = all_targets_df[stat_col].max() - + if stat_config.log_y_axis: log_stat_col_max = np.ceil(np.log10(stat_col_max)) - stat_col_max = 10 ** log_stat_col_max + stat_col_max = 10**log_stat_col_max log_stat_col_min = np.floor(np.log10(stat_col_min)) - stat_col_min = 10 ** log_stat_col_min - tick_values = np.arange(log_stat_col_min, log_stat_col_max, - step=int(np.ceil(log_stat_col_max / 6))) + stat_col_min = 10**log_stat_col_min + tick_values = np.arange( + log_stat_col_min, + log_stat_col_max, + step=int(np.ceil(log_stat_col_max / 6)), + ) tick_values = np.power(10, tick_values) xyrange = np.arange(1, stat_col_max) @@ -1131,7 +1979,7 @@ def gpucachesim_baseline(target, memory_only=False): else: xyrange = np.arange(stat_col_min, stat_col_max, step=1) tick_values = np.linspace(stat_col_min, stat_col_max, 6) - + ax.plot( xyrange, xyrange, @@ -1149,8 +1997,10 @@ def gpucachesim_baseline(target, memory_only=False): zorder=1, ) - tick_labels = [plot.human_format_thousands(v, round_to=0) for v in tick_values] - + tick_labels = [ + plot.human_format_thousands(v, round_to=0) for v in tick_values + ] + ax.set_ylabel(ylabel) ax.set_xlabel(xlabel) ax.set_xticks(tick_values, tick_labels) @@ -1165,10 +2015,12 @@ def gpucachesim_baseline(target, memory_only=False): filename.parent.mkdir(parents=True, exist_ok=True) fig.savefig(filename) - # create one plot for all benchmarks if bench_name_arg is not None: - bench_df = stats.set_index(["target"] + benchmarks.SIMULATE_INPUT_COLS).sort_index() + bench_df = stats.set_index( + ["target"] + benchmarks.SIMULATE_INPUT_COLS + ).sort_index() + def stat_cols_for_profiler(profiler: str) -> typing.Sequence[str]: stat_cols = [ @@ -1220,6 +2072,7 @@ class StatConfig(typing.NamedTuple): grid: bool percent: bool + DEFAULT_STAT_CONFIG = StatConfig( label="", log_y_axis=False, @@ -1412,7 +2265,8 @@ def compute_label(df): label = "Transpose\n" label += "{}\n".format(df["input_variant"]) label += "{}x{}".format( - int(df["input_dim"]), int(df["input_dim"]), + int(df["input_dim"]), + int(df["input_dim"]), ) case "babelstream": label = "BabelStream\n" @@ -1432,7 +2286,7 @@ def compute_target_name(name): return "AccelSim" case "profile": return "Native" - + per_kernel["label"] = per_kernel.apply(compute_label, axis=1) per_kernel["target_name"] = per_kernel["target"].apply(compute_target_name) @@ -1521,7 +2375,9 @@ def compute_target_name(name): stat_col, target_name, target_idx, - str(inputs[benchmarks.BENCHMARK_INPUT_COLS[benchmark]].tolist()), + str( + inputs[benchmarks.BENCHMARK_INPUT_COLS[benchmark]].tolist() + ), inputs_idx, idx, target_df[stat_col].fillna(0.0).mean(), @@ -1590,7 +2446,7 @@ def compute_target_name(name): if stat_config.log_y_axis: assert not stat_config.percent ymax_log = np.ceil(np.log10(ymax)) - ytick_values = np.arange(0, ymax_log+1, step=int(np.ceil(ymax_log / 6))) + ytick_values = np.arange(0, ymax_log + 1, step=int(np.ceil(ymax_log / 6))) ytick_values = np.power(10, ytick_values) print(stat_col, ymax_log, ytick_values) ax.set_yscale("log", base=10) @@ -1606,13 +2462,17 @@ def compute_target_name(name): ax.set_ylim(0, ymax) ytick_values = np.linspace(0, ymax, 6) - ytick_labels = [plot.human_format_thousands(v, round_to=0) for v in ytick_values] + ytick_labels = [ + plot.human_format_thousands(v, round_to=0) for v in ytick_values + ] ax.set_yticks(ytick_values, ytick_labels) ax.legend( - loc='upper left', + loc="upper left", bbox_to_anchor=(1, 1), - edgecolor="none", fancybox=False, shadow=False, + edgecolor="none", + fancybox=False, + shadow=False, ) filename = plot.PLOT_DIR / "validation/{}.{}.{}.pdf".format( profiler, bench_name, stat_col @@ -1647,13 +2507,25 @@ def compute_target_name(name): help="target", ) @click.option("--verbose", "verbose", type=bool, is_flag=True, help="verbose output") -@click.option("--strict", "strict", type=bool, default=True, help="fail on missing results") +@click.option( + "--strict", "strict", type=bool, default=True, help="fail on missing results" +) @click.option("--nvprof", "nvprof", type=bool, default=True, help="use nvprof") @click.option("--nsight", "nsight", type=bool, default=False, help="use nsight") @click.option("--out", "output_path", help="Output path for combined stats") def generate( - path, config_path, bench_name, input_idx, limit, quick, - target, verbose, strict, nvprof, nsight, output_path + path, + config_path, + bench_name, + input_idx, + limit, + quick, + target, + verbose, + strict, + nvprof, + nsight, + output_path, ): benches = [] @@ -1681,7 +2553,9 @@ def generate( if limit is not None: benches = benches[:limit] - print(f"processing {len(benches)} benchmark configurations ({len(targets)} targets)") + print( + f"processing {len(benches)} benchmark configurations ({len(targets)} targets)" + ) with open(config_path, "rb") as f: config = GPUConfig(yaml.safe_load(f)) @@ -1774,9 +2648,7 @@ def generate( if verbose: print(all_stats) - stats_output_path = ( - results_dir / f"combined.stats.{profiler}.{bench_name}.csv" - ) + stats_output_path = results_dir / f"combined.stats.{profiler}.{bench_name}.csv" if output_path is not None: stats_output_path = Path(output_path) diff --git a/plot/validation/nvprof.simple_matrixmul.cycles.pdf b/plot/validation/nvprof.simple_matrixmul.cycles.pdf index 2a4091cc..032104c0 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.cycles.pdf and b/plot/validation/nvprof.simple_matrixmul.cycles.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.exec_time_sec.pdf b/plot/validation/nvprof.simple_matrixmul.exec_time_sec.pdf index 2bc59486..7d4049c7 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.exec_time_sec.pdf and b/plot/validation/nvprof.simple_matrixmul.exec_time_sec.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.input_id.pdf b/plot/validation/nvprof.simple_matrixmul.input_id.pdf index 7366bec4..4e4180c3 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.input_id.pdf and b/plot/validation/nvprof.simple_matrixmul.input_id.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.instructions.pdf b/plot/validation/nvprof.simple_matrixmul.instructions.pdf index e1a7dab1..aa71fdf6 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.instructions.pdf and b/plot/validation/nvprof.simple_matrixmul.instructions.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l1_accesses.pdf b/plot/validation/nvprof.simple_matrixmul.l1_accesses.pdf index 7f0090eb..13d5469a 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l1_accesses.pdf and b/plot/validation/nvprof.simple_matrixmul.l1_accesses.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l1_global_hit_rate.pdf b/plot/validation/nvprof.simple_matrixmul.l1_global_hit_rate.pdf index 4846703c..545abcbb 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l1_global_hit_rate.pdf and b/plot/validation/nvprof.simple_matrixmul.l1_global_hit_rate.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l1_hit_rate.pdf b/plot/validation/nvprof.simple_matrixmul.l1_hit_rate.pdf index dd80eb60..b3b66dc0 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l1_hit_rate.pdf and b/plot/validation/nvprof.simple_matrixmul.l1_hit_rate.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l1_local_hit_rate.pdf b/plot/validation/nvprof.simple_matrixmul.l1_local_hit_rate.pdf index e0b782de..7cf27f0c 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l1_local_hit_rate.pdf and b/plot/validation/nvprof.simple_matrixmul.l1_local_hit_rate.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l2_accesses.pdf b/plot/validation/nvprof.simple_matrixmul.l2_accesses.pdf index a075a901..35596a21 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l2_accesses.pdf and b/plot/validation/nvprof.simple_matrixmul.l2_accesses.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l2_read_hit_rate.pdf b/plot/validation/nvprof.simple_matrixmul.l2_read_hit_rate.pdf index 0fb63216..29c65873 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l2_read_hit_rate.pdf and b/plot/validation/nvprof.simple_matrixmul.l2_read_hit_rate.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l2_reads.pdf b/plot/validation/nvprof.simple_matrixmul.l2_reads.pdf index e3ad138d..056a9956 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l2_reads.pdf and b/plot/validation/nvprof.simple_matrixmul.l2_reads.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l2_write_hit_rate.pdf b/plot/validation/nvprof.simple_matrixmul.l2_write_hit_rate.pdf index d532ae8f..d6dad6c2 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l2_write_hit_rate.pdf and b/plot/validation/nvprof.simple_matrixmul.l2_write_hit_rate.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.l2_writes.pdf b/plot/validation/nvprof.simple_matrixmul.l2_writes.pdf index 76880ab6..b63e1c86 100644 Binary files a/plot/validation/nvprof.simple_matrixmul.l2_writes.pdf and b/plot/validation/nvprof.simple_matrixmul.l2_writes.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.mean_blocks_per_sm.pdf b/plot/validation/nvprof.simple_matrixmul.mean_blocks_per_sm.pdf new file mode 100644 index 00000000..1e620d91 Binary files /dev/null and b/plot/validation/nvprof.simple_matrixmul.mean_blocks_per_sm.pdf differ diff --git a/plot/validation/nvprof.simple_matrixmul.num_blocks.pdf b/plot/validation/nvprof.simple_matrixmul.num_blocks.pdf new file mode 100644 index 00000000..5e8b13ab Binary files /dev/null and b/plot/validation/nvprof.simple_matrixmul.num_blocks.pdf differ diff --git a/test-apps/test-apps-materialized.yml b/test-apps/test-apps-materialized.yml index 686a8eec..8de2cab9 100755 --- a/test-apps/test-apps-materialized.yml +++ b/test-apps/test-apps-materialized.yml @@ -2,7 +2,7 @@ ## ## AUTO GENERATED! DO NOT EDIT ## -## this configuration was materialized from /home/roman/dev/box/test-apps/test-apps.yml on 15/11/2023 15:15:46 +## this configuration was materialized from /home/roman/dev/box/test-apps/test-apps.yml on 16/11/2023 14:34:52 ## config: @@ -993,6 +993,31 @@ benchmarks: target: Profile target_config: !Profile profile_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/profile + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 10 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Profile + target_config: !Profile + profile_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/profile matrixmul: - name: matrixmul benchmark_idx: 2 @@ -1078,6 +1103,27 @@ benchmarks: target: Profile target_config: !Profile profile_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/profile + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 10 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Profile + target_config: !Profile + profile_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/profile transpose: - name: transpose benchmark_idx: 3 @@ -2326,6 +2372,35 @@ benchmarks: full_trace: true skip_kernel_prefixes: - gpucachesim_skip + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Trace + target_config: !Trace + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + save_json: false + full_trace: true + skip_kernel_prefixes: + - gpucachesim_skip matrixmul: - name: matrixmul benchmark_idx: 2 @@ -2427,6 +2502,31 @@ benchmarks: full_trace: true skip_kernel_prefixes: - gpucachesim_skip + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Trace + target_config: !Trace + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + save_json: false + full_trace: true + skip_kernel_prefixes: + - gpucachesim_skip transpose: - name: transpose benchmark_idx: 3 @@ -3563,6 +3663,31 @@ benchmarks: target: AccelsimTrace target_config: !AccelsimTrace traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: AccelsimTrace + target_config: !AccelsimTrace + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace matrixmul: - name: matrixmul benchmark_idx: 2 @@ -3648,6 +3773,27 @@ benchmarks: target: AccelsimTrace target_config: !AccelsimTrace traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: AccelsimTrace + target_config: !AccelsimTrace + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace transpose: - name: transpose benchmark_idx: 3 @@ -114303,25 +114449,28 @@ benchmarks: accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: false - matrixmul: - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 0 + input_idx: 2376 common: repetitions: 1 timeout: null @@ -114330,29 +114479,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 1 + input_idx: 2377 common: repetitions: 1 timeout: null @@ -114361,29 +114514,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 2 + input_idx: 2378 common: repetitions: 1 timeout: null @@ -114392,29 +114549,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 3 + input_idx: 2379 common: repetitions: 1 timeout: null @@ -114423,29 +114584,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 4 + input_idx: 2380 common: repetitions: 1 timeout: null @@ -114454,29 +114619,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 5 + input_idx: 2381 common: repetitions: 1 timeout: null @@ -114485,29 +114654,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 6 + input_idx: 2382 common: repetitions: 1 timeout: null @@ -114516,29 +114689,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 7 + input_idx: 2383 common: repetitions: 1 timeout: null @@ -114547,29 +114724,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 8 + input_idx: 2384 common: repetitions: 1 timeout: null @@ -114578,29 +114759,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 9 + input_idx: 2385 common: repetitions: 1 timeout: null @@ -114609,29 +114794,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 10 + input_idx: 2386 common: repetitions: 1 timeout: null @@ -114640,29 +114829,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 11 + input_idx: 2387 common: repetitions: 1 timeout: null @@ -114671,29 +114864,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 12 + input_idx: 2388 common: repetitions: 1 timeout: null @@ -114702,29 +114899,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 13 + input_idx: 2389 common: repetitions: 1 timeout: null @@ -114733,29 +114934,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 14 + input_idx: 2390 common: repetitions: 1 timeout: null @@ -114764,29 +114969,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 15 + input_idx: 2391 common: repetitions: 1 timeout: null @@ -114795,29 +115004,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 16 + input_idx: 2392 common: repetitions: 1 timeout: null @@ -114826,29 +115039,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 17 + input_idx: 2393 common: repetitions: 1 timeout: null @@ -114857,29 +115074,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 18 + input_idx: 2394 common: repetitions: 1 timeout: null @@ -114888,29 +115109,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 19 + input_idx: 2395 common: repetitions: 1 timeout: null @@ -114919,29 +115144,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 20 + input_idx: 2396 common: repetitions: 1 timeout: null @@ -114950,29 +115179,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 21 + input_idx: 2397 common: repetitions: 1 timeout: null @@ -114981,29 +115214,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 22 + input_idx: 2398 common: repetitions: 1 timeout: null @@ -115012,29 +115249,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 23 + input_idx: 2399 common: repetitions: 1 timeout: null @@ -115043,29 +115284,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 24 + input_idx: 2400 common: repetitions: 1 timeout: null @@ -115074,29 +115319,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 25 + input_idx: 2401 common: repetitions: 1 timeout: null @@ -115105,29 +115354,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 26 + input_idx: 2402 common: repetitions: 1 timeout: null @@ -115136,29 +115389,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 27 + input_idx: 2403 common: repetitions: 1 timeout: null @@ -115167,29 +115424,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 28 + input_idx: 2404 common: repetitions: 1 timeout: null @@ -115198,29 +115459,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 29 + input_idx: 2405 common: repetitions: 1 timeout: null @@ -115229,29 +115494,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 30 + input_idx: 2406 common: repetitions: 1 timeout: null @@ -115260,29 +115529,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 31 + input_idx: 2407 common: repetitions: 1 timeout: null @@ -115291,29 +115564,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 32 + input_idx: 2408 common: repetitions: 1 timeout: null @@ -115322,29 +115599,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 33 + input_idx: 2409 common: repetitions: 1 timeout: null @@ -115353,29 +115634,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 34 + input_idx: 2410 common: repetitions: 1 timeout: null @@ -115384,29 +115669,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 35 + input_idx: 2411 common: repetitions: 1 timeout: null @@ -115415,29 +115704,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 36 + input_idx: 2412 common: repetitions: 1 timeout: null @@ -115446,29 +115739,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 37 + input_idx: 2413 common: repetitions: 1 timeout: null @@ -115477,29 +115774,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 38 + input_idx: 2414 common: repetitions: 1 timeout: null @@ -115508,29 +115809,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 39 + input_idx: 2415 common: repetitions: 1 timeout: null @@ -115539,29 +115844,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 40 + input_idx: 2416 common: repetitions: 1 timeout: null @@ -115570,29 +115879,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 41 + input_idx: 2417 common: repetitions: 1 timeout: null @@ -115601,29 +115914,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 42 + input_idx: 2418 common: repetitions: 1 timeout: null @@ -115632,29 +115949,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 4 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 43 + input_idx: 2419 common: repetitions: 1 timeout: null @@ -115663,29 +115984,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 44 + input_idx: 2420 common: repetitions: 1 timeout: null @@ -115694,29 +116019,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 20 cores_per_cluster: 8 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 45 + input_idx: 2421 common: repetitions: 1 timeout: null @@ -115725,29 +116054,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 46 + input_idx: 2422 common: repetitions: 1 timeout: null @@ -115756,29 +116089,2922 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null - l2_prefill: true - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul values: - dtype: 32 - rows: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 40 cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' + - '512' - '32' - input_idx: 47 + input_idx: 2423 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2424 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2425 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2426 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2427 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2428 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2429 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2430 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2431 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-40-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2432 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2433 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2434 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2435 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2436 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2437 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2438 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2439 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2440 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2441 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2442 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2443 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2444 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2445 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2446 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2447 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2448 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2449 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2450 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2451 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2452 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2453 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2454 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2455 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2456 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2457 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2458 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2459 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2460 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2461 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-20-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2462 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 2463 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic_interleave-n-32-num_clusters-40-p-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + parallel: null + l2_prefill: false + matrixmul: + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 0 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 1 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 2 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 3 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 5 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 6 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 7 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 8 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 9 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 10 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 11 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 12 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 13 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 14 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 15 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 16 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 17 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 18 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 19 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 20 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 21 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 22 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 23 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 24 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 25 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 26 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 28 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 29 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 30 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 31 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 32 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 33 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 34 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 35 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 36 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 37 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 38 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 39 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 40 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 41 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 42 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 43 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 44 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 45 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 46 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 47 common: repetitions: 1 timeout: null @@ -115794,22 +119020,2595 @@ benchmarks: l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 48 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 49 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 50 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 51 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 52 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 53 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 54 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 55 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 56 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 57 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 58 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 59 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 60 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 61 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 62 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 63 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 64 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 65 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 66 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 67 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 68 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 69 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 70 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 71 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 72 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 73 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 74 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 75 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 76 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 77 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 78 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 79 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 80 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 81 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '32' + - '32' + input_idx: 82 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '32' + - '32' + input_idx: 83 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '32' + - '32' + input_idx: 84 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '32' + - '32' + input_idx: 85 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + input_idx: 86 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 32 + mode: nondeterministic_interleave + threads: 8 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '32' + - '32' + input_idx: 87 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 88 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 89 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '64' + - '32' + input_idx: 90 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' + - '32' + input_idx: 91 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 92 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 93 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 94 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 95 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 96 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 97 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '64' + - '32' + input_idx: 98 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' + - '32' + input_idx: 99 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 100 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 101 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 102 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 103 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 104 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 105 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '64' + - '32' + input_idx: 106 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' + - '32' + input_idx: 107 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 108 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 109 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 110 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 111 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 112 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 113 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '64' + - '32' + input_idx: 114 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' + - '32' + input_idx: 115 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 116 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 117 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 118 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 119 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 120 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 121 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: false + args: + - '64' + - '32' + input_idx: 122 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' + - '32' + input_idx: 123 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 124 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 125 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 126 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 127 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 128 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 20 cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 129 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + input_idx: 130 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 4 + memory_only: true + args: + - '64' - '32' - input_idx: 48 + input_idx: 131 common: repetitions: 1 timeout: null @@ -115818,29 +121617,153 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 + num_clusters: 20 + cores_per_cluster: 8 + memory_only: false + args: + - '64' + - '32' + input_idx: 132 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 num_clusters: 20 + cores_per_cluster: 8 + memory_only: true + args: + - '64' + - '32' + input_idx: 133 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 + cores_per_cluster: 1 + memory_only: false + args: + - '64' + - '32' + input_idx: 134 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 40 cores_per_cluster: 1 memory_only: true args: + - '64' - '32' + input_idx: 135 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: false + args: + - '64' - '32' - input_idx: 49 + input_idx: 136 common: repetitions: 1 timeout: null @@ -115849,19 +121772,50 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 20 + cores_per_cluster: 1 + memory_only: true + args: + - '64' + - '32' + input_idx: 137 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + parallel: null + l2_prefill: true + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -115869,9 +121823,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 50 + input_idx: 138 common: repetitions: 1 timeout: null @@ -115880,19 +121834,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -115900,9 +121854,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 51 + input_idx: 139 common: repetitions: 1 timeout: null @@ -115911,19 +121865,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -115931,9 +121885,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 52 + input_idx: 140 common: repetitions: 1 timeout: null @@ -115942,19 +121896,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -115962,9 +121916,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 53 + input_idx: 141 common: repetitions: 1 timeout: null @@ -115973,19 +121927,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -115993,9 +121947,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 54 + input_idx: 142 common: repetitions: 1 timeout: null @@ -116004,19 +121958,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic threads: 8 run_ahead: 10 @@ -116024,9 +121978,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 55 + input_idx: 143 common: repetitions: 1 timeout: null @@ -116035,19 +121989,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116055,9 +122009,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 56 + input_idx: 144 common: repetitions: 1 timeout: null @@ -116066,19 +122020,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116086,9 +122040,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 57 + input_idx: 145 common: repetitions: 1 timeout: null @@ -116097,19 +122051,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116117,9 +122071,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 58 + input_idx: 146 common: repetitions: 1 timeout: null @@ -116128,19 +122082,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116148,9 +122102,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 59 + input_idx: 147 common: repetitions: 1 timeout: null @@ -116159,19 +122113,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116179,9 +122133,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 60 + input_idx: 148 common: repetitions: 1 timeout: null @@ -116190,19 +122144,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116210,9 +122164,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 61 + input_idx: 149 common: repetitions: 1 timeout: null @@ -116221,19 +122175,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116241,9 +122195,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 62 + input_idx: 150 common: repetitions: 1 timeout: null @@ -116252,19 +122206,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -116272,9 +122226,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 63 + input_idx: 151 common: repetitions: 1 timeout: null @@ -116283,19 +122237,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116303,9 +122257,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 64 + input_idx: 152 common: repetitions: 1 timeout: null @@ -116314,19 +122268,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116334,9 +122288,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 65 + input_idx: 153 common: repetitions: 1 timeout: null @@ -116345,19 +122299,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116365,9 +122319,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 66 + input_idx: 154 common: repetitions: 1 timeout: null @@ -116376,19 +122330,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116396,9 +122350,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 67 + input_idx: 155 common: repetitions: 1 timeout: null @@ -116407,19 +122361,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116427,9 +122381,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 68 + input_idx: 156 common: repetitions: 1 timeout: null @@ -116438,19 +122392,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116458,9 +122412,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 69 + input_idx: 157 common: repetitions: 1 timeout: null @@ -116469,19 +122423,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116489,9 +122443,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 70 + input_idx: 158 common: repetitions: 1 timeout: null @@ -116500,19 +122454,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -116520,9 +122474,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 71 + input_idx: 159 common: repetitions: 1 timeout: null @@ -116531,19 +122485,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116551,9 +122505,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 72 + input_idx: 160 common: repetitions: 1 timeout: null @@ -116562,19 +122516,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116582,9 +122536,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 73 + input_idx: 161 common: repetitions: 1 timeout: null @@ -116593,19 +122547,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116613,9 +122567,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 74 + input_idx: 162 common: repetitions: 1 timeout: null @@ -116624,19 +122578,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116644,9 +122598,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 75 + input_idx: 163 common: repetitions: 1 timeout: null @@ -116655,19 +122609,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116675,9 +122629,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 76 + input_idx: 164 common: repetitions: 1 timeout: null @@ -116686,19 +122640,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116706,9 +122660,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 77 + input_idx: 165 common: repetitions: 1 timeout: null @@ -116717,19 +122671,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116737,9 +122691,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 78 + input_idx: 166 common: repetitions: 1 timeout: null @@ -116748,19 +122702,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -116768,9 +122722,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 79 + input_idx: 167 common: repetitions: 1 timeout: null @@ -116779,19 +122733,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116799,9 +122753,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 80 + input_idx: 168 common: repetitions: 1 timeout: null @@ -116810,19 +122764,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116830,9 +122784,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 81 + input_idx: 169 common: repetitions: 1 timeout: null @@ -116841,19 +122795,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116861,9 +122815,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 82 + input_idx: 170 common: repetitions: 1 timeout: null @@ -116872,19 +122826,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116892,9 +122846,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 83 + input_idx: 171 common: repetitions: 1 timeout: null @@ -116903,19 +122857,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116923,9 +122877,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 84 + input_idx: 172 common: repetitions: 1 timeout: null @@ -116934,19 +122888,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116954,9 +122908,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 85 + input_idx: 173 common: repetitions: 1 timeout: null @@ -116965,19 +122919,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -116985,9 +122939,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: + - '64' - '32' - - '32' - input_idx: 86 + input_idx: 174 common: repetitions: 1 timeout: null @@ -116996,19 +122950,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 32 + rows: 64 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -117016,9 +122970,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: + - '64' - '32' - - '32' - input_idx: 87 + input_idx: 175 common: repetitions: 1 timeout: null @@ -117027,19 +122981,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-32-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117047,9 +123001,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 88 + input_idx: 176 common: repetitions: 1 timeout: null @@ -117058,19 +123012,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117078,9 +123032,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 89 + input_idx: 177 common: repetitions: 1 timeout: null @@ -117089,19 +123043,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117109,9 +123063,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 90 + input_idx: 178 common: repetitions: 1 timeout: null @@ -117120,19 +123074,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117140,9 +123094,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 91 + input_idx: 179 common: repetitions: 1 timeout: null @@ -117151,19 +123105,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117171,9 +123125,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 92 + input_idx: 180 common: repetitions: 1 timeout: null @@ -117182,19 +123136,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117202,9 +123156,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 93 + input_idx: 181 common: repetitions: 1 timeout: null @@ -117213,19 +123167,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117233,9 +123187,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 94 + input_idx: 182 common: repetitions: 1 timeout: null @@ -117244,19 +123198,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: serial threads: 4 run_ahead: 5 @@ -117264,9 +123218,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 95 + input_idx: 183 common: repetitions: 1 timeout: null @@ -117275,19 +123229,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117295,9 +123249,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 96 + input_idx: 184 common: repetitions: 1 timeout: null @@ -117306,19 +123260,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117326,9 +123280,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 97 + input_idx: 185 common: repetitions: 1 timeout: null @@ -117337,19 +123291,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117357,9 +123311,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 98 + input_idx: 186 common: repetitions: 1 timeout: null @@ -117368,19 +123322,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117388,9 +123342,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 99 + input_idx: 187 common: repetitions: 1 timeout: null @@ -117399,19 +123353,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117419,9 +123373,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 100 + input_idx: 188 common: repetitions: 1 timeout: null @@ -117430,19 +123384,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117450,9 +123404,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 101 + input_idx: 189 common: repetitions: 1 timeout: null @@ -117461,19 +123415,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117481,9 +123435,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 102 + input_idx: 190 common: repetitions: 1 timeout: null @@ -117492,19 +123446,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 4 run_ahead: 5 @@ -117512,9 +123466,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 103 + input_idx: 191 common: repetitions: 1 timeout: null @@ -117523,19 +123477,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117543,9 +123497,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 104 + input_idx: 192 common: repetitions: 1 timeout: null @@ -117554,19 +123508,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117574,9 +123528,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 105 + input_idx: 193 common: repetitions: 1 timeout: null @@ -117585,19 +123539,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117605,9 +123559,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 106 + input_idx: 194 common: repetitions: 1 timeout: null @@ -117616,19 +123570,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117636,9 +123590,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 107 + input_idx: 195 common: repetitions: 1 timeout: null @@ -117647,19 +123601,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117667,9 +123621,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 108 + input_idx: 196 common: repetitions: 1 timeout: null @@ -117678,19 +123632,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117698,9 +123652,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 109 + input_idx: 197 common: repetitions: 1 timeout: null @@ -117709,19 +123663,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117729,9 +123683,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 110 + input_idx: 198 common: repetitions: 1 timeout: null @@ -117740,19 +123694,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: deterministic threads: 8 run_ahead: 5 @@ -117760,9 +123714,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 111 + input_idx: 199 common: repetitions: 1 timeout: null @@ -117771,19 +123725,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117791,9 +123745,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 112 + input_idx: 200 common: repetitions: 1 timeout: null @@ -117802,19 +123756,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117822,9 +123776,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 113 + input_idx: 201 common: repetitions: 1 timeout: null @@ -117833,19 +123787,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117853,9 +123807,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 114 + input_idx: 202 common: repetitions: 1 timeout: null @@ -117864,19 +123818,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117884,9 +123838,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 115 + input_idx: 203 common: repetitions: 1 timeout: null @@ -117895,19 +123849,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117915,9 +123869,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 116 + input_idx: 204 common: repetitions: 1 timeout: null @@ -117926,19 +123880,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117946,9 +123900,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 117 + input_idx: 205 common: repetitions: 1 timeout: null @@ -117957,19 +123911,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -117977,9 +123931,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 118 + input_idx: 206 common: repetitions: 1 timeout: null @@ -117988,19 +123942,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 5 @@ -118008,9 +123962,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 119 + input_idx: 207 common: repetitions: 1 timeout: null @@ -118019,19 +123973,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118039,9 +123993,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 120 + input_idx: 208 common: repetitions: 1 timeout: null @@ -118050,19 +124004,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118070,9 +124024,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 121 + input_idx: 209 common: repetitions: 1 timeout: null @@ -118081,19 +124035,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118101,9 +124055,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 122 + input_idx: 210 common: repetitions: 1 timeout: null @@ -118112,19 +124066,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118132,9 +124086,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 123 + input_idx: 211 common: repetitions: 1 timeout: null @@ -118143,19 +124097,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118163,9 +124117,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 124 + input_idx: 212 common: repetitions: 1 timeout: null @@ -118174,19 +124128,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118194,9 +124148,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 125 + input_idx: 213 common: repetitions: 1 timeout: null @@ -118205,19 +124159,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118225,9 +124179,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 126 + input_idx: 214 common: repetitions: 1 timeout: null @@ -118236,19 +124190,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 4 run_ahead: 10 @@ -118256,9 +124210,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 127 + input_idx: 215 common: repetitions: 1 timeout: null @@ -118267,19 +124221,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118287,9 +124241,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 128 + input_idx: 216 common: repetitions: 1 timeout: null @@ -118298,19 +124252,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118318,9 +124272,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 129 + input_idx: 217 common: repetitions: 1 timeout: null @@ -118329,19 +124283,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118349,9 +124303,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 130 + input_idx: 218 common: repetitions: 1 timeout: null @@ -118360,19 +124314,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118380,9 +124334,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 131 + input_idx: 219 common: repetitions: 1 timeout: null @@ -118391,19 +124345,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118411,9 +124365,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 132 + input_idx: 220 common: repetitions: 1 timeout: null @@ -118422,19 +124376,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118442,9 +124396,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 133 + input_idx: 221 common: repetitions: 1 timeout: null @@ -118453,19 +124407,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118473,9 +124427,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 134 + input_idx: 222 common: repetitions: 1 timeout: null @@ -118484,19 +124438,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 5 @@ -118504,9 +124458,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 135 + input_idx: 223 common: repetitions: 1 timeout: null @@ -118515,19 +124469,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118535,9 +124489,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 136 + input_idx: 224 common: repetitions: 1 timeout: null @@ -118546,19 +124500,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118566,9 +124520,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 137 + input_idx: 225 common: repetitions: 1 timeout: null @@ -118577,19 +124531,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118597,9 +124551,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 138 + input_idx: 226 common: repetitions: 1 timeout: null @@ -118608,19 +124562,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118628,9 +124582,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 139 + input_idx: 227 common: repetitions: 1 timeout: null @@ -118639,19 +124593,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118659,9 +124613,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 140 + input_idx: 228 common: repetitions: 1 timeout: null @@ -118670,19 +124624,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118690,9 +124644,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 141 + input_idx: 229 common: repetitions: 1 timeout: null @@ -118701,19 +124655,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118721,9 +124675,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 142 + input_idx: 230 common: repetitions: 1 timeout: null @@ -118732,19 +124686,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic threads: 8 run_ahead: 10 @@ -118752,9 +124706,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 143 + input_idx: 231 common: repetitions: 1 timeout: null @@ -118763,19 +124717,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118783,9 +124737,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 144 + input_idx: 232 common: repetitions: 1 timeout: null @@ -118794,19 +124748,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118814,9 +124768,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 145 + input_idx: 233 common: repetitions: 1 timeout: null @@ -118825,19 +124779,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118845,9 +124799,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 146 + input_idx: 234 common: repetitions: 1 timeout: null @@ -118856,19 +124810,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118876,9 +124830,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 147 + input_idx: 235 common: repetitions: 1 timeout: null @@ -118887,19 +124841,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118907,9 +124861,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 148 + input_idx: 236 common: repetitions: 1 timeout: null @@ -118918,19 +124872,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118938,9 +124892,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 149 + input_idx: 237 common: repetitions: 1 timeout: null @@ -118949,19 +124903,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -118969,9 +124923,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 150 + input_idx: 238 common: repetitions: 1 timeout: null @@ -118980,19 +124934,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -119000,9 +124954,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 151 + input_idx: 239 common: repetitions: 1 timeout: null @@ -119011,19 +124965,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119031,9 +124985,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 152 + input_idx: 240 common: repetitions: 1 timeout: null @@ -119042,19 +124996,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119062,9 +125016,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 153 + input_idx: 241 common: repetitions: 1 timeout: null @@ -119073,19 +125027,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119093,9 +125047,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 154 + input_idx: 242 common: repetitions: 1 timeout: null @@ -119104,19 +125058,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119124,9 +125078,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 155 + input_idx: 243 common: repetitions: 1 timeout: null @@ -119135,19 +125089,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119155,9 +125109,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 156 + input_idx: 244 common: repetitions: 1 timeout: null @@ -119166,19 +125120,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119186,9 +125140,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 157 + input_idx: 245 common: repetitions: 1 timeout: null @@ -119197,19 +125151,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119217,9 +125171,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 158 + input_idx: 246 common: repetitions: 1 timeout: null @@ -119228,19 +125182,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -119248,9 +125202,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 159 + input_idx: 247 common: repetitions: 1 timeout: null @@ -119259,19 +125213,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119279,9 +125233,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 160 + input_idx: 248 common: repetitions: 1 timeout: null @@ -119290,19 +125244,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119310,9 +125264,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 161 + input_idx: 249 common: repetitions: 1 timeout: null @@ -119321,19 +125275,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119341,9 +125295,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 162 + input_idx: 250 common: repetitions: 1 timeout: null @@ -119352,19 +125306,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119372,9 +125326,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 163 + input_idx: 251 common: repetitions: 1 timeout: null @@ -119383,19 +125337,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119403,9 +125357,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 164 + input_idx: 252 common: repetitions: 1 timeout: null @@ -119414,19 +125368,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119434,9 +125388,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 165 + input_idx: 253 common: repetitions: 1 timeout: null @@ -119445,19 +125399,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119465,9 +125419,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 166 + input_idx: 254 common: repetitions: 1 timeout: null @@ -119476,19 +125430,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -119496,9 +125450,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 167 + input_idx: 255 common: repetitions: 1 timeout: null @@ -119507,19 +125461,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119527,9 +125481,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 168 + input_idx: 256 common: repetitions: 1 timeout: null @@ -119538,19 +125492,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119558,9 +125512,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 169 + input_idx: 257 common: repetitions: 1 timeout: null @@ -119569,19 +125523,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119589,9 +125543,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 170 + input_idx: 258 common: repetitions: 1 timeout: null @@ -119600,19 +125554,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119620,9 +125574,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 171 + input_idx: 259 common: repetitions: 1 timeout: null @@ -119631,19 +125585,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119651,9 +125605,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 172 + input_idx: 260 common: repetitions: 1 timeout: null @@ -119662,19 +125616,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119682,9 +125636,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 173 + input_idx: 261 common: repetitions: 1 timeout: null @@ -119693,19 +125647,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119713,9 +125667,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '64' + - '128' - '32' - input_idx: 174 + input_idx: 262 common: repetitions: 1 timeout: null @@ -119724,19 +125678,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 64 + rows: 128 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -119744,9 +125698,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '64' + - '128' - '32' - input_idx: 175 + input_idx: 263 common: repetitions: 1 timeout: null @@ -119755,19 +125709,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-64-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119775,9 +125729,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 176 + input_idx: 264 common: repetitions: 1 timeout: null @@ -119786,19 +125740,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119806,9 +125760,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 177 + input_idx: 265 common: repetitions: 1 timeout: null @@ -119817,19 +125771,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119837,9 +125791,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 178 + input_idx: 266 common: repetitions: 1 timeout: null @@ -119848,19 +125802,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119868,9 +125822,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 179 + input_idx: 267 common: repetitions: 1 timeout: null @@ -119879,19 +125833,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119899,9 +125853,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 180 + input_idx: 268 common: repetitions: 1 timeout: null @@ -119910,19 +125864,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119930,9 +125884,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 181 + input_idx: 269 common: repetitions: 1 timeout: null @@ -119941,19 +125895,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119961,9 +125915,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 182 + input_idx: 270 common: repetitions: 1 timeout: null @@ -119972,19 +125926,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: serial threads: 4 run_ahead: 5 @@ -119992,9 +125946,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 183 + input_idx: 271 common: repetitions: 1 timeout: null @@ -120003,19 +125957,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120023,9 +125977,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 184 + input_idx: 272 common: repetitions: 1 timeout: null @@ -120034,19 +125988,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120054,9 +126008,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 185 + input_idx: 273 common: repetitions: 1 timeout: null @@ -120065,19 +126019,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120085,9 +126039,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 186 + input_idx: 274 common: repetitions: 1 timeout: null @@ -120096,19 +126050,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120116,9 +126070,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 187 + input_idx: 275 common: repetitions: 1 timeout: null @@ -120127,19 +126081,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120147,9 +126101,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 188 + input_idx: 276 common: repetitions: 1 timeout: null @@ -120158,19 +126112,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120178,9 +126132,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 189 + input_idx: 277 common: repetitions: 1 timeout: null @@ -120189,19 +126143,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120209,9 +126163,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 190 + input_idx: 278 common: repetitions: 1 timeout: null @@ -120220,19 +126174,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 4 run_ahead: 5 @@ -120240,9 +126194,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 191 + input_idx: 279 common: repetitions: 1 timeout: null @@ -120251,19 +126205,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120271,9 +126225,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 192 + input_idx: 280 common: repetitions: 1 timeout: null @@ -120282,19 +126236,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120302,9 +126256,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 193 + input_idx: 281 common: repetitions: 1 timeout: null @@ -120313,19 +126267,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120333,9 +126287,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 194 + input_idx: 282 common: repetitions: 1 timeout: null @@ -120344,19 +126298,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120364,9 +126318,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 195 + input_idx: 283 common: repetitions: 1 timeout: null @@ -120375,19 +126329,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120395,9 +126349,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 196 + input_idx: 284 common: repetitions: 1 timeout: null @@ -120406,19 +126360,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120426,9 +126380,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 197 + input_idx: 285 common: repetitions: 1 timeout: null @@ -120437,19 +126391,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120457,9 +126411,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 198 + input_idx: 286 common: repetitions: 1 timeout: null @@ -120468,19 +126422,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 @@ -120488,9 +126442,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 199 + input_idx: 287 common: repetitions: 1 timeout: null @@ -120499,19 +126453,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120519,9 +126473,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 200 + input_idx: 288 common: repetitions: 1 timeout: null @@ -120530,19 +126484,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120550,9 +126504,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 201 + input_idx: 289 common: repetitions: 1 timeout: null @@ -120561,19 +126515,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120581,9 +126535,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 202 + input_idx: 290 common: repetitions: 1 timeout: null @@ -120592,19 +126546,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120612,9 +126566,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 203 + input_idx: 291 common: repetitions: 1 timeout: null @@ -120623,19 +126577,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120643,9 +126597,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 204 + input_idx: 292 common: repetitions: 1 timeout: null @@ -120654,19 +126608,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120674,9 +126628,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 205 + input_idx: 293 common: repetitions: 1 timeout: null @@ -120685,19 +126639,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120705,9 +126659,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 206 + input_idx: 294 common: repetitions: 1 timeout: null @@ -120716,19 +126670,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 @@ -120736,9 +126690,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 207 + input_idx: 295 common: repetitions: 1 timeout: null @@ -120747,19 +126701,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120767,9 +126721,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 208 + input_idx: 296 common: repetitions: 1 timeout: null @@ -120778,19 +126732,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120798,9 +126752,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 209 + input_idx: 297 common: repetitions: 1 timeout: null @@ -120809,19 +126763,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120829,9 +126783,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 210 + input_idx: 298 common: repetitions: 1 timeout: null @@ -120840,19 +126794,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120860,9 +126814,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 211 + input_idx: 299 common: repetitions: 1 timeout: null @@ -120871,19 +126825,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120891,9 +126845,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 212 + input_idx: 300 common: repetitions: 1 timeout: null @@ -120902,19 +126856,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120922,9 +126876,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 213 + input_idx: 301 common: repetitions: 1 timeout: null @@ -120933,19 +126887,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120953,9 +126907,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 214 + input_idx: 302 common: repetitions: 1 timeout: null @@ -120964,19 +126918,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 @@ -120984,9 +126938,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 215 + input_idx: 303 common: repetitions: 1 timeout: null @@ -120995,19 +126949,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121015,9 +126969,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 216 + input_idx: 304 common: repetitions: 1 timeout: null @@ -121026,19 +126980,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121046,9 +127000,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 217 + input_idx: 305 common: repetitions: 1 timeout: null @@ -121057,19 +127011,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121077,9 +127031,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 218 + input_idx: 306 common: repetitions: 1 timeout: null @@ -121088,19 +127042,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121108,9 +127062,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 219 + input_idx: 307 common: repetitions: 1 timeout: null @@ -121119,19 +127073,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121139,9 +127093,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 220 + input_idx: 308 common: repetitions: 1 timeout: null @@ -121150,19 +127104,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121170,9 +127124,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 221 + input_idx: 309 common: repetitions: 1 timeout: null @@ -121181,19 +127135,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121201,9 +127155,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 222 + input_idx: 310 common: repetitions: 1 timeout: null @@ -121212,19 +127166,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 5 @@ -121232,9 +127186,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 223 + input_idx: 311 common: repetitions: 1 timeout: null @@ -121243,19 +127197,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121263,9 +127217,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 224 + input_idx: 312 common: repetitions: 1 timeout: null @@ -121274,19 +127228,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121294,9 +127248,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 225 + input_idx: 313 common: repetitions: 1 timeout: null @@ -121305,19 +127259,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121325,9 +127279,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 226 + input_idx: 314 common: repetitions: 1 timeout: null @@ -121336,19 +127290,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121356,9 +127310,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 227 + input_idx: 315 common: repetitions: 1 timeout: null @@ -121367,19 +127321,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121387,9 +127341,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 228 + input_idx: 316 common: repetitions: 1 timeout: null @@ -121398,19 +127352,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121418,9 +127372,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 229 + input_idx: 317 common: repetitions: 1 timeout: null @@ -121429,19 +127383,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121449,9 +127403,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 230 + input_idx: 318 common: repetitions: 1 timeout: null @@ -121460,19 +127414,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic threads: 8 run_ahead: 10 @@ -121480,9 +127434,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 231 + input_idx: 319 common: repetitions: 1 timeout: null @@ -121491,19 +127445,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121511,9 +127465,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 232 + input_idx: 320 common: repetitions: 1 timeout: null @@ -121522,19 +127476,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121542,9 +127496,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 233 + input_idx: 321 common: repetitions: 1 timeout: null @@ -121553,19 +127507,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121573,9 +127527,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 234 + input_idx: 322 common: repetitions: 1 timeout: null @@ -121584,19 +127538,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121604,9 +127558,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 235 + input_idx: 323 common: repetitions: 1 timeout: null @@ -121615,19 +127569,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121635,9 +127589,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 236 + input_idx: 324 common: repetitions: 1 timeout: null @@ -121646,19 +127600,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121666,9 +127620,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 237 + input_idx: 325 common: repetitions: 1 timeout: null @@ -121677,19 +127631,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121697,9 +127651,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 238 + input_idx: 326 common: repetitions: 1 timeout: null @@ -121708,19 +127662,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -121728,9 +127682,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 239 + input_idx: 327 common: repetitions: 1 timeout: null @@ -121739,19 +127693,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121759,9 +127713,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 240 + input_idx: 328 common: repetitions: 1 timeout: null @@ -121770,19 +127724,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121790,9 +127744,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 241 + input_idx: 329 common: repetitions: 1 timeout: null @@ -121801,19 +127755,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121821,9 +127775,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 242 + input_idx: 330 common: repetitions: 1 timeout: null @@ -121832,19 +127786,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121852,9 +127806,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 243 + input_idx: 331 common: repetitions: 1 timeout: null @@ -121863,19 +127817,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121883,9 +127837,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 244 + input_idx: 332 common: repetitions: 1 timeout: null @@ -121894,19 +127848,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121914,9 +127868,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 245 + input_idx: 333 common: repetitions: 1 timeout: null @@ -121925,19 +127879,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121945,9 +127899,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 246 + input_idx: 334 common: repetitions: 1 timeout: null @@ -121956,19 +127910,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -121976,9 +127930,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 247 + input_idx: 335 common: repetitions: 1 timeout: null @@ -121987,19 +127941,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122007,9 +127961,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 248 + input_idx: 336 common: repetitions: 1 timeout: null @@ -122018,19 +127972,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122038,9 +127992,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 249 + input_idx: 337 common: repetitions: 1 timeout: null @@ -122049,19 +128003,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122069,9 +128023,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 250 + input_idx: 338 common: repetitions: 1 timeout: null @@ -122080,19 +128034,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122100,9 +128054,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 251 + input_idx: 339 common: repetitions: 1 timeout: null @@ -122111,19 +128065,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122131,9 +128085,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 252 + input_idx: 340 common: repetitions: 1 timeout: null @@ -122142,19 +128096,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122162,9 +128116,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 253 + input_idx: 341 common: repetitions: 1 timeout: null @@ -122173,19 +128127,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122193,9 +128147,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 254 + input_idx: 342 common: repetitions: 1 timeout: null @@ -122204,19 +128158,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -122224,9 +128178,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 255 + input_idx: 343 common: repetitions: 1 timeout: null @@ -122235,19 +128189,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122255,9 +128209,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 256 + input_idx: 344 common: repetitions: 1 timeout: null @@ -122266,19 +128220,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122286,9 +128240,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 257 + input_idx: 345 common: repetitions: 1 timeout: null @@ -122297,19 +128251,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122317,9 +128271,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 258 + input_idx: 346 common: repetitions: 1 timeout: null @@ -122328,19 +128282,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122348,9 +128302,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 259 + input_idx: 347 common: repetitions: 1 timeout: null @@ -122359,19 +128313,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122379,9 +128333,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 260 + input_idx: 348 common: repetitions: 1 timeout: null @@ -122390,19 +128344,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122410,9 +128364,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 261 + input_idx: 349 common: repetitions: 1 timeout: null @@ -122421,19 +128375,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122441,9 +128395,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '128' + - '256' - '32' - input_idx: 262 + input_idx: 350 common: repetitions: 1 timeout: null @@ -122452,19 +128406,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 128 + rows: 256 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -122472,9 +128426,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '128' + - '256' - '32' - input_idx: 263 + input_idx: 351 common: repetitions: 1 timeout: null @@ -122483,19 +128437,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-128-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122503,9 +128457,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 264 + input_idx: 352 common: repetitions: 1 timeout: null @@ -122514,19 +128468,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122534,9 +128488,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 265 + input_idx: 353 common: repetitions: 1 timeout: null @@ -122545,19 +128499,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122565,9 +128519,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 266 + input_idx: 354 common: repetitions: 1 timeout: null @@ -122576,19 +128530,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122596,9 +128550,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 267 + input_idx: 355 common: repetitions: 1 timeout: null @@ -122607,19 +128561,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122627,9 +128581,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 268 + input_idx: 356 common: repetitions: 1 timeout: null @@ -122638,19 +128592,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122658,9 +128612,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 269 + input_idx: 357 common: repetitions: 1 timeout: null @@ -122669,19 +128623,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122689,9 +128643,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 270 + input_idx: 358 common: repetitions: 1 timeout: null @@ -122700,19 +128654,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: serial threads: 4 run_ahead: 5 @@ -122720,9 +128674,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 271 + input_idx: 359 common: repetitions: 1 timeout: null @@ -122731,19 +128685,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122751,9 +128705,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 272 + input_idx: 360 common: repetitions: 1 timeout: null @@ -122762,19 +128716,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122782,9 +128736,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 273 + input_idx: 361 common: repetitions: 1 timeout: null @@ -122793,19 +128747,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122813,9 +128767,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 274 + input_idx: 362 common: repetitions: 1 timeout: null @@ -122824,19 +128778,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122844,9 +128798,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 275 + input_idx: 363 common: repetitions: 1 timeout: null @@ -122855,19 +128809,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122875,9 +128829,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 276 + input_idx: 364 common: repetitions: 1 timeout: null @@ -122886,19 +128840,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122906,9 +128860,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 277 + input_idx: 365 common: repetitions: 1 timeout: null @@ -122917,19 +128871,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122937,9 +128891,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 278 + input_idx: 366 common: repetitions: 1 timeout: null @@ -122948,19 +128902,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 4 run_ahead: 5 @@ -122968,9 +128922,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 279 + input_idx: 367 common: repetitions: 1 timeout: null @@ -122979,19 +128933,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -122999,9 +128953,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 280 + input_idx: 368 common: repetitions: 1 timeout: null @@ -123010,19 +128964,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123030,9 +128984,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 281 + input_idx: 369 common: repetitions: 1 timeout: null @@ -123041,19 +128995,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123061,9 +129015,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 282 + input_idx: 370 common: repetitions: 1 timeout: null @@ -123072,19 +129026,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123092,9 +129046,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 283 + input_idx: 371 common: repetitions: 1 timeout: null @@ -123103,19 +129057,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123123,9 +129077,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 284 + input_idx: 372 common: repetitions: 1 timeout: null @@ -123134,19 +129088,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123154,9 +129108,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 285 + input_idx: 373 common: repetitions: 1 timeout: null @@ -123165,19 +129119,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123185,9 +129139,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 286 + input_idx: 374 common: repetitions: 1 timeout: null @@ -123196,19 +129150,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: deterministic threads: 8 run_ahead: 5 @@ -123216,9 +129170,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 287 + input_idx: 375 common: repetitions: 1 timeout: null @@ -123227,19 +129181,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123247,9 +129201,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 288 + input_idx: 376 common: repetitions: 1 timeout: null @@ -123258,19 +129212,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123278,9 +129232,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 289 + input_idx: 377 common: repetitions: 1 timeout: null @@ -123289,19 +129243,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123309,9 +129263,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 290 + input_idx: 378 common: repetitions: 1 timeout: null @@ -123320,19 +129274,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123340,9 +129294,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 291 + input_idx: 379 common: repetitions: 1 timeout: null @@ -123351,19 +129305,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123371,9 +129325,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 292 + input_idx: 380 common: repetitions: 1 timeout: null @@ -123382,19 +129336,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123402,9 +129356,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 293 + input_idx: 381 common: repetitions: 1 timeout: null @@ -123413,19 +129367,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123433,9 +129387,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 294 + input_idx: 382 common: repetitions: 1 timeout: null @@ -123444,19 +129398,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 5 @@ -123464,9 +129418,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 295 + input_idx: 383 common: repetitions: 1 timeout: null @@ -123475,19 +129429,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123495,9 +129449,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 296 + input_idx: 384 common: repetitions: 1 timeout: null @@ -123506,19 +129460,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123526,9 +129480,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 297 + input_idx: 385 common: repetitions: 1 timeout: null @@ -123537,19 +129491,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123557,9 +129511,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 298 + input_idx: 386 common: repetitions: 1 timeout: null @@ -123568,19 +129522,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123588,9 +129542,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 299 + input_idx: 387 common: repetitions: 1 timeout: null @@ -123599,19 +129553,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123619,9 +129573,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 300 + input_idx: 388 common: repetitions: 1 timeout: null @@ -123630,19 +129584,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123650,9 +129604,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 301 + input_idx: 389 common: repetitions: 1 timeout: null @@ -123661,19 +129615,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123681,9 +129635,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 302 + input_idx: 390 common: repetitions: 1 timeout: null @@ -123692,19 +129646,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 4 run_ahead: 10 @@ -123712,9 +129666,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 303 + input_idx: 391 common: repetitions: 1 timeout: null @@ -123723,19 +129677,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123743,9 +129697,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 304 + input_idx: 392 common: repetitions: 1 timeout: null @@ -123754,19 +129708,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123774,9 +129728,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 305 + input_idx: 393 common: repetitions: 1 timeout: null @@ -123785,19 +129739,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123805,9 +129759,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 306 + input_idx: 394 common: repetitions: 1 timeout: null @@ -123816,19 +129770,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123836,9 +129790,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 307 + input_idx: 395 common: repetitions: 1 timeout: null @@ -123847,19 +129801,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123867,9 +129821,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 308 + input_idx: 396 common: repetitions: 1 timeout: null @@ -123878,19 +129832,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123898,9 +129852,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 309 + input_idx: 397 common: repetitions: 1 timeout: null @@ -123909,19 +129863,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123929,9 +129883,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 310 + input_idx: 398 common: repetitions: 1 timeout: null @@ -123940,19 +129894,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 5 @@ -123960,9 +129914,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 311 + input_idx: 399 common: repetitions: 1 timeout: null @@ -123971,19 +129925,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -123991,9 +129945,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 312 + input_idx: 400 common: repetitions: 1 timeout: null @@ -124002,19 +129956,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124022,9 +129976,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 313 + input_idx: 401 common: repetitions: 1 timeout: null @@ -124033,19 +129987,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124053,9 +130007,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 314 + input_idx: 402 common: repetitions: 1 timeout: null @@ -124064,19 +130018,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124084,9 +130038,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 315 + input_idx: 403 common: repetitions: 1 timeout: null @@ -124095,19 +130049,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124115,9 +130069,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 316 + input_idx: 404 common: repetitions: 1 timeout: null @@ -124126,19 +130080,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124146,9 +130100,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 317 + input_idx: 405 common: repetitions: 1 timeout: null @@ -124157,19 +130111,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124177,9 +130131,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 318 + input_idx: 406 common: repetitions: 1 timeout: null @@ -124188,19 +130142,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic threads: 8 run_ahead: 10 @@ -124208,9 +130162,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 319 + input_idx: 407 common: repetitions: 1 timeout: null @@ -124219,19 +130173,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-40-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124239,9 +130193,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 320 + input_idx: 408 common: repetitions: 1 timeout: null @@ -124250,19 +130204,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124270,9 +130224,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 321 + input_idx: 409 common: repetitions: 1 timeout: null @@ -124281,19 +130235,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124301,9 +130255,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 322 + input_idx: 410 common: repetitions: 1 timeout: null @@ -124312,19 +130266,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124332,9 +130286,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 323 + input_idx: 411 common: repetitions: 1 timeout: null @@ -124343,19 +130297,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124363,9 +130317,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 324 + input_idx: 412 common: repetitions: 1 timeout: null @@ -124374,19 +130328,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124394,9 +130348,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 325 + input_idx: 413 common: repetitions: 1 timeout: null @@ -124405,19 +130359,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124425,9 +130379,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 326 + input_idx: 414 common: repetitions: 1 timeout: null @@ -124436,19 +130390,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 5 @@ -124456,9 +130410,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 327 + input_idx: 415 common: repetitions: 1 timeout: null @@ -124467,19 +130421,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124487,9 +130441,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 328 + input_idx: 416 common: repetitions: 1 timeout: null @@ -124498,19 +130452,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124518,9 +130472,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 329 + input_idx: 417 common: repetitions: 1 timeout: null @@ -124529,19 +130483,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124549,9 +130503,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 330 + input_idx: 418 common: repetitions: 1 timeout: null @@ -124560,19 +130514,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124580,9 +130534,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 331 + input_idx: 419 common: repetitions: 1 timeout: null @@ -124591,19 +130545,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124611,9 +130565,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 332 + input_idx: 420 common: repetitions: 1 timeout: null @@ -124622,19 +130576,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124642,9 +130596,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 333 + input_idx: 421 common: repetitions: 1 timeout: null @@ -124653,19 +130607,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124673,9 +130627,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 334 + input_idx: 422 common: repetitions: 1 timeout: null @@ -124684,19 +130638,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 4 run_ahead: 10 @@ -124704,9 +130658,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 335 + input_idx: 423 common: repetitions: 1 timeout: null @@ -124715,19 +130669,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-4/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-4/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124735,9 +130689,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 336 + input_idx: 424 common: repetitions: 1 timeout: null @@ -124746,19 +130700,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124766,9 +130720,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 337 + input_idx: 425 common: repetitions: 1 timeout: null @@ -124777,19 +130731,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124797,9 +130751,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 338 + input_idx: 426 common: repetitions: 1 timeout: null @@ -124808,19 +130762,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124828,9 +130782,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 339 + input_idx: 427 common: repetitions: 1 timeout: null @@ -124839,19 +130793,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124859,9 +130813,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 340 + input_idx: 428 common: repetitions: 1 timeout: null @@ -124870,19 +130824,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124890,9 +130844,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 341 + input_idx: 429 common: repetitions: 1 timeout: null @@ -124901,19 +130855,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124921,9 +130875,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 342 + input_idx: 430 common: repetitions: 1 timeout: null @@ -124932,19 +130886,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 5 @@ -124952,9 +130906,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 343 + input_idx: 431 common: repetitions: 1 timeout: null @@ -124963,19 +130917,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-5-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-5-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -124983,9 +130937,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 344 + input_idx: 432 common: repetitions: 1 timeout: null @@ -124994,19 +130948,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125014,9 +130968,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 345 + input_idx: 433 common: repetitions: 1 timeout: null @@ -125025,19 +130979,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125045,9 +130999,9 @@ benchmarks: cores_per_cluster: 4 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 346 + input_idx: 434 common: repetitions: 1 timeout: null @@ -125056,19 +131010,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125076,9 +131030,9 @@ benchmarks: cores_per_cluster: 4 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 347 + input_idx: 435 common: repetitions: 1 timeout: null @@ -125087,19 +131041,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125107,9 +131061,9 @@ benchmarks: cores_per_cluster: 8 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 348 + input_idx: 436 common: repetitions: 1 timeout: null @@ -125118,19 +131072,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125138,9 +131092,9 @@ benchmarks: cores_per_cluster: 8 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 349 + input_idx: 437 common: repetitions: 1 timeout: null @@ -125149,19 +131103,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-20-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125169,9 +131123,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '256' + - '512' - '32' - input_idx: 350 + input_idx: 438 common: repetitions: 1 timeout: null @@ -125180,19 +131134,19 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true - name: matrixmul benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/matrixmul executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul values: dtype: 32 - rows: 256 + rows: 512 mode: nondeterministic_interleave threads: 8 run_ahead: 10 @@ -125200,9 +131154,9 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '256' + - '512' - '32' - input_idx: 351 + input_idx: 439 common: repetitions: 1 timeout: null @@ -125211,9 +131165,9 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-256-run_ahead-10-threads-8/sim - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic_interleave-num_clusters-40-rows-512-run_ahead-10-threads-8/sim + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: true transpose: @@ -152580,6 +158534,33 @@ benchmarks: stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/sim/exec-driven parallel: null l2_prefill: null + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: null + enabled: null + results_dir: /home/roman/dev/box/results + target: ExecDrivenSimulate + target_config: !ExecDrivenSimulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/sim/exec-driven + parallel: null + l2_prefill: null matrixmul: - name: matrixmul benchmark_idx: 2 @@ -152673,6 +158654,29 @@ benchmarks: stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/sim/exec-driven parallel: null l2_prefill: null + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: null + enabled: null + results_dir: /home/roman/dev/box/results + target: ExecDrivenSimulate + target_config: !ExecDrivenSimulate + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/sim/exec-driven + parallel: null + l2_prefill: null transpose: - name: transpose benchmark_idx: 3 @@ -153976,6 +159980,36 @@ benchmarks: config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-sim + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: AccelsimSimulate + target_config: !AccelsimSimulate + trace_config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.trace.config + inter_config: /home/roman/dev/box/accelsim/gtx1080/config_fermi_islip.icnt + config_dir: /home/roman/dev/box/accelsim/gtx1080 + config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-sim matrixmul: - name: matrixmul benchmark_idx: 2 @@ -154081,6 +160115,32 @@ benchmarks: config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-sim + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: AccelsimSimulate + target_config: !AccelsimSimulate + trace_config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.trace.config + inter_config: /home/roman/dev/box/accelsim/gtx1080/config_fermi_islip.icnt + config_dir: /home/roman/dev/box/accelsim/gtx1080 + config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-sim transpose: - name: transpose benchmark_idx: 3 @@ -155411,6 +161471,36 @@ benchmarks: config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/playground-sim + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-dtype-32-m-512-n-32-p-512 + path: /home/roman/dev/box/test-apps/simple_matrixmul + executable: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + values: + m: 512 + n: 32 + p: 512 + dtype: 32 + args: + - '512' + - '32' + - '512' + - '32' + input_idx: 27 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: PlaygroundSimulate + target_config: !PlaygroundSimulate + trace_config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.trace.config + inter_config: /home/roman/dev/box/accelsim/gtx1080/config_fermi_islip.icnt + config_dir: /home/roman/dev/box/accelsim/gtx1080 + config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/playground-sim matrixmul: - name: matrixmul benchmark_idx: 2 @@ -155516,6 +161606,32 @@ benchmarks: config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/playground-sim + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-dtype-32-rows-512 + path: /home/roman/dev/box/test-apps/matrixmul + executable: /home/roman/dev/box/test-apps/matrixmul/matrixmul + values: + dtype: 32 + rows: 512 + args: + - '512' + - '32' + input_idx: 4 + common: + repetitions: 1 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: PlaygroundSimulate + target_config: !PlaygroundSimulate + trace_config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.trace.config + inter_config: /home/roman/dev/box/accelsim/gtx1080/config_fermi_islip.icnt + config_dir: /home/roman/dev/box/accelsim/gtx1080 + config: /home/roman/dev/box/accelsim/gtx1080/gpgpusim.config + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/playground-sim transpose: - name: transpose benchmark_idx: 3 diff --git a/test-apps/test-apps.yml b/test-apps/test-apps.yml index ed5460bb..d46ee3ef 100644 --- a/test-apps/test-apps.yml +++ b/test-apps/test-apps.yml @@ -105,8 +105,9 @@ benchmarks: m: [32, 64, 128] n: [32, 64, 128] p: [32, 64, 128] - # include: - # - { m: 32, n: 1024, p: 32, dtype: 32 } + include: + - {m: 512, n: 32, p: 512, dtype: 32,} + # - { m: 32, n: 1024, p: 32, dtype: 32 } # - { m: 32, n: 2048, p: 32, dtype: 32 } # - { m: 32, n: 4096, p: 32, dtype: 32 } # (m x n) x (n x p) @@ -121,8 +122,8 @@ benchmarks: inputs: dtype: [32] rows: [32, 64, 128, 256, 512] - exclude: - - rows: 512 + # exclude: + # - rows: 512 # (rows x rows) x (rows x rows) args: "{{ input.rows }} {{ input.dtype }}" simulate: