From 88d11cc579346f56ec2532fcc13cf4f344d0d4fe Mon Sep 17 00:00:00 2001 From: Maxim Andreev Date: Sat, 31 Aug 2024 07:33:29 +0000 Subject: [PATCH] Benchmark: add whatsabi state mutability results, improve results table UX --- README.md | 122 ++++++++++++++------------ benchmark/compare.py | 12 ++- benchmark/providers/whatsabi/main.mjs | 20 ++++- 3 files changed, 91 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 2023ae5..df1b680 100644 --- a/README.md +++ b/README.md @@ -137,9 +137,9 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc Time 0.4s · 0.8s · 0.6s 2.9s - 37.6s(*) + 38s(*) 0.5s - 341.3s(*) + 341s(*) 1.8s @@ -182,12 +182,12 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc Time - 4.5s · 11.7s · 10.0s - 49.3s + 4.5s · 12s · 10s + 49s 1427s(*) 5.8s 8576s(*) - 49.2s + 49s @@ -231,9 +231,9 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc Time 0.4s · 0.7s · 0.5s 2.2s - 60.0s(*) + 60s(*) 0.4s - 27.4s(*) + 27s(*) 1.1s @@ -247,32 +247,32 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc evmole rs · js · py heimdall - simple + smpl largest1k
24427
functions
Errors - 14.0%, 3417 🥇 - 31.1%, 7593 - 58.3%, 14242 + 14.0% 🥇
3417 + 31.1%
7593 + 58.3%
14242 Time 1.0s · 8.3s · 3.5s - 341.6s(*) + 342s(*) 0.7s random50k
1171102
functions
Errors - 4.5%, 52777 🥇 - 19.4%, 227612 - 54.9%, 643213 + 4.5% 🥇
52777 + 19.4%
227612 + 54.9%
643213 Time - 22.9s · 262.7s · 103.8s + 23s · 263s · 104s 8544s(*) 9.7s @@ -280,14 +280,14 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc vyper
21244
functions
Errors - 49.6%, 10544 🥇 - 100.0%, 21244 - 56.8%, 12077 + 49.6% 🥇
10544 + 100.0%
21244 + 56.8%
12077 Time 0.7s · 5.2s · 2.2s - 28.2s(*) + 28s(*) 0.5s @@ -303,83 +303,93 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc Dataset evmole rs · js · py + whatsabi sevm heimdall - simple + smpl largest1k
24427
functions
Errors - 0.0%, 0 🥇 - 2.1%, 501 - 25.4%, 6201 - 2.6%, 643 + 0.0% 🥇
0 + 68.1%
16623 + 2.1%
501 + 25.4%
6201 + 2.6%
643 Errors strict - 19.3%, 4718 🥇 - 59.0%, 14417 - 54.9%, 13403 - 60.9%, 14864 + 19.3% 🥇
4718 + 79.3%
19370 + 59.0%
14417 + 54.9%
13403 + 60.9%
14864 Time - 7.9s · 17.3s · 10.1s - 37.4s(*) - 339.1s(*) + 7.9s · 17s · 10s + 3.7s + 37s(*) + 339s(*) 0.7s - + random50k
1160861
functions
Errors - 0.0%, 35 🥇 - 0.3%, 3887 - 11.6%, 134195 - 2.2%, 24961 + 0.0% 🥇
35 + 30.2%
351060 + 0.3%
3887 + 11.6%
134195 + 2.2%
24961 Errors strict - 6.8%, 78676 🥇 - 55.7%, 647070 - 27.7%, 321494 - 57.7%, 670318 + 6.8% 🥇
78676 + 58.1%
674922 + 55.7%
647070 + 27.7%
321494 + 57.7%
670318 Time - 225.8s · 523.1s · 309.1s - 1708.9s(*) - 8151.0s(*) + 226s · 523s · 309s + 80s + 1709s(*) + 8151s(*) 9.4s - + vyper
21166
functions
Errors - 0.5%, 110 🥇 - 77.8%, 16462 - 100.0%, 21166 - 1.8%, 390 + 0.5% 🥇
110 + 100.0%
21166 + 77.8%
16462 + 100.0%
21166 + 1.8%
390 Errors strict - 11.4%, 2410 🥇 - 91.0%, 19253 - 100.0%, 21166 - 59.6%, 12610 + 11.4% 🥇
2410 + 100.0%
21166 + 91.0%
19253 + 100.0%
21166 + 59.6%
12610 Time 3.7s · 8.7s · 5.1s - 59.3s(*) - 28.1s(*) + 2.2s + 59s(*) + 28s(*) 0.6s See [benchmark/README.md](./benchmark/) for the methodology and commands to reproduce these results -versions: evmole master (01e2a8d0); whatsabi v0.14.1; sevm v0.6.19; evm-hound-rs v0.1.4; heimdall-rs v0.8.4 +versions: evmole master (fc4274e4); whatsabi v0.14.1; sevm v0.6.19; evm-hound-rs v0.1.4; heimdall-rs v0.8.4 (*): sevm and heimdall-rs are full decompilers, not limited to extracting function selectors diff --git a/benchmark/compare.py b/benchmark/compare.py index 16b15bf..c7f06ac 100644 --- a/benchmark/compare.py +++ b/benchmark/compare.py @@ -35,6 +35,10 @@ def process_selectors(dname: str, providers: list[str], results_dir: str): return {'dataset': dname, 'results': ret, 'timings': ptimes[1:]} +def format_time(val: float) -> str: + return f'{val:.1f}s' if val < 10 else f'{val:.0f}s' + + def markdown_selectors(providers: list[str], all_results: list): print('') print(' ') @@ -75,7 +79,7 @@ def markdown_selectors(providers: list[str], all_results: list): print(' ') print(' ') for idx in range(0, len(providers) - 1): # skip ground_truth provider - print(f' ') + print(f' ') print(' ') if dataset_idx != len(all_results) - 1: print(f' ') @@ -97,19 +101,19 @@ def markdown_arguments_or_mutability(providers: list[str], all_results: list, se print(' ') for provider_idx in range(0, len(providers) - 1): # skip ground_truth provider bad_fn = sum(1 - y['data'][provider_idx][0] for x in dataset_result['results'] for y in x['func']) - print(f' ') + print(f' ') print(' ') if second_results is not None: print(' ') print(' ') for provider_idx in range(0, len(providers) - 1): # skip ground_truth provider bad_fn = sum(1 - y['data'][provider_idx][0] for x in second_results[dataset_idx]['results'] for y in x['func']) - print(f' ') + print(f' ') print(' ') print(' ') print(' ') for idx in range(0, len(providers) - 1): # skip ground_truth provider - print(f' ') + print(f' ') print(' ') if dataset_idx != len(all_results) - 1: print(f' ') diff --git a/benchmark/providers/whatsabi/main.mjs b/benchmark/providers/whatsabi/main.mjs index c08d2e6..38585b2 100644 --- a/benchmark/providers/whatsabi/main.mjs +++ b/benchmark/providers/whatsabi/main.mjs @@ -9,18 +9,32 @@ if (argv.length < 5) { } const mode = argv[2]; -if (mode != 'selectors') { - console.log('Only "selectors" mode supported, got ', mode) +if (mode != 'selectors' && mode != 'mutability') { + console.log('Only "selectors" and "mutability" modes are supported, got ', mode) process.exit(1) } const indir = argv[3]; const outfile = argv[4]; +const selectors = mode === 'selectors' ? {} : JSON.parse(readFileSync(argv[5])); + +function extract(code, mode, fname) { + if (mode == 'selectors') { + return whatsabi.selectorsFromBytecode(code).map(x => x.slice(2)); // remove '0x' prefix + } else { // mutability + const abi = whatsabi.abiFromBytecode(code); + const smut = Object.fromEntries(abi.filter((v) => v.type == 'function').map((v) => [v.selector, v.stateMutability])); + return Object.fromEntries(selectors[fname].map((s) => { + return [s, smut[`0x${s}`] || 'selnotfound']; + })); + } +} + const res = Object.fromEntries( readdirSync(indir).map( file => [ file, - whatsabi.selectorsFromBytecode(JSON.parse(readFileSync(`${indir}/${file}`))['code']).map(x => x.slice(2)) // remove '0x' prefix + extract(JSON.parse(readFileSync(`${indir}/${file}`))['code'], mode, file) ] ) );
Time{dataset_result["timings"][idx]:.1f}s{format_time(dataset_result["timings"][idx])}
Errors{(bad_fn*100/cnt_funcs):.1f}%, {bad_fn}{(bad_fn*100/cnt_funcs):.1f}%
{bad_fn}
Errors 2nd{(bad_fn*100/cnt_funcs):.1f}%, {bad_fn}{(bad_fn*100/cnt_funcs):.1f}%
{bad_fn}
Time{dataset_result["timings"][idx]:.1f}s{format_time(dataset_result["timings"][idx])}