Skip to content

Commit

Permalink
add experimental storage benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
cdump committed Dec 29, 2024
1 parent e97f4f3 commit ddd835c
Show file tree
Hide file tree
Showing 7 changed files with 963 additions and 9 deletions.
13 changes: 9 additions & 4 deletions benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,42 @@ PROVIDER_BASE = etherscan
PROVIDERS_SELECTORS ?= simple whatsabi evm-hound-rs sevm evmole-rs evmole-js evmole-py
PROVIDERS_ARGUMENTS ?= simple evmole-rs evmole-js evmole-py
PROVIDERS_MUTABILITY ?= simple whatsabi sevm evmole-rs evmole-js evmole-py
#heimdall-rs
PROVIDERS_STORAGE ?= evmole-rs smlxl

DATASETS ?= largest1k random50k vyper
DATASETS_STORAGE ?= storage3k
DOCKER ?= docker
DOCKER_CPUS ?= 1
DOCKER_PREFIX ?= evmole-bench

PROVIDERS_SELECTORS := $(PROVIDER_BASE) $(PROVIDERS_SELECTORS)
PROVIDERS_ARGUMENTS := $(PROVIDER_BASE) $(PROVIDERS_ARGUMENTS)
PROVIDERS_MUTABILITY := $(PROVIDER_BASE) $(PROVIDERS_MUTABILITY)
PROVIDERS_UNIQ := $(sort $(PROVIDERS_SELECTORS) $(PROVIDERS_ARGUMENTS) $(PROVIDERS_MUTABILITY))
PROVIDERS_STORAGE := $(PROVIDER_BASE) $(PROVIDERS_STORAGE)
PROVIDERS_UNIQ := $(sort $(PROVIDERS_SELECTORS) $(PROVIDERS_ARGUMENTS) $(PROVIDERS_MUTABILITY) $(PROVIDERS_STORAGE))

DATASET := $(shell pwd)/datasets
RES := $(shell pwd)/results

BUILD_TARGETS := $(addsuffix .build, $(PROVIDERS_UNIQ))
UNPACK_TARGETS := $(foreach d,$(DATASETS),$(addprefix datasets/, $(d)))
UNPACK_TARGETS := $(foreach d,$(DATASETS) $(DATASETS_STORAGE),$(addprefix datasets/, $(d)))
RUN_SELECTORS_TARGETS := $(foreach p,$(PROVIDERS_SELECTORS),$(addprefix $(p).selectors/, $(DATASETS)))
RUN_ARGUMENTS_TARGETS := $(foreach p,$(PROVIDERS_ARGUMENTS),$(addprefix $(p).arguments/, $(DATASETS)))
RUN_MUTABILITY_TARGETS := $(foreach p,$(PROVIDERS_MUTABILITY),$(addprefix $(p).mutability/, $(DATASETS)))
RUN_STORAGE_TARGETS := $(foreach p,$(PROVIDERS_STORAGE),$(addprefix $(p).storage/, $(DATASETS_STORAGE)))

RUN_TARGETS := $(RUN_SELECTORS_TARGETS) $(RUN_ARGUMENTS_TARGETS) $(RUN_MUTABILITY_TARGETS)
RUN_TARGETS := $(RUN_SELECTORS_TARGETS) $(RUN_ARGUMENTS_TARGETS) $(RUN_MUTABILITY_TARGETS) $(RUN_STORAGE_TARGETS)

benchmark-selectors: $(addsuffix .build, $(PROVIDERS_SELECTORS)) run-selectors
benchmark-arguments: $(addsuffix .build, $(PROVIDERS_ARGUMENTS)) run-arguments
benchmark-mutability: $(addsuffix .build, $(PROVIDERS_MUTABILITY)) run-mutability
benchmark-storage: $(addsuffix .build, $(PROVIDERS_STORAGE)) run-storage

build: $(BUILD_TARGETS)
run-selectors: $(RUN_SELECTORS_TARGETS)
run-arguments: $(RUN_ARGUMENTS_TARGETS)
run-mutability: $(RUN_MUTABILITY_TARGETS)
run-storage: $(RUN_STORAGE_TARGETS)

$(BUILD_TARGETS):
$(info [*] Building $(basename $@)...)
Expand Down
53 changes: 48 additions & 5 deletions benchmark/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,42 @@ def process_mutability(dname: str, providers: list[str], results_dir: str, stric
def process_arguments(dname: str, providers: list[str], results_dir: str, normalize_rules: set[str]):
return process_functions('arguments', dname, providers, results_dir, lambda x: normalize_args(x, normalize_rules))

def process_storage(dname: str, providers: list[str], results_dir: str):
pdata, ptimes = load_data('storage', dname, providers, results_dir)
ret = []
for fname, (_meta, gt) in pdata[0].items():
func = []
for gt_slot, gt_type in gt.items():
data = []
for i in range(1, len(providers)): # skip ground_truth provider
vtype = pdata[i][fname][1].get(gt_slot)
if vtype == gt_type:
data.append([1])
else:
data.append([0, vtype])
func.append({'s': gt_slot, 'gt': gt_type, 'data': data})

qwe = set()
for i in range(1, len(providers)):
qwe |= set(pdata[i][fname][1].keys())

false_positive_slots = sorted(list(qwe - set(pdata[0][fname][1].keys())))
for slot in false_positive_slots:
data = []
for i in range(1, len(providers)): # skip ground_truth provider
vtype = pdata[i][fname][1].get(slot)
if vtype is None:
data.append([1])
else:
data.append([0, vtype])
func.append({'s': slot, 'gt': None, 'data': data})

ret.append({
'addr': fname[2:-5], # '0xFF.json' => 'FF'
'func': func,
})
return {'dataset': dname, 'results': ret, 'timings': ptimes[1:]}

def show_arguments_or_mutability(providers: list[str], all_results: list, show_errors: bool):
for dataset_result in all_results:
cnt_contracts = len(dataset_result['results'])
Expand All @@ -244,7 +280,7 @@ def show_arguments_or_mutability(providers: list[str], all_results: list, show_e
if show_errors is not True:
continue
print(' errors:')
for x in dataset_result['results']:
for x in sorted(dataset_result['results'], key=lambda x:x['addr']):
for y in x['func']:
if len(y['data'][provider_idx]) > 1:
assert y['data'][provider_idx][0] == 0
Expand All @@ -260,22 +296,24 @@ def show_arguments_or_mutability(providers: list[str], all_results: list, show_e
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--results-dir', type=str, default=pathlib.Path(__file__).parent / 'results', help='results directory')
parser.add_argument('--mode', choices=['selectors', 'arguments', 'mutability'], default='selectors', help='mode')
parser.add_argument('--mode', choices=['selectors', 'arguments', 'mutability', 'storage'], default='selectors', help='mode')
parser.add_argument('--providers', nargs='+', default=None)
parser.add_argument('--datasets', nargs='+', default=['largest1k', 'random50k', 'vyper'])
parser.add_argument('--datasets', nargs='+', default=None)
parser.add_argument('--markdown', nargs='?', default=False, const=True, help='show markdown output')
parser.add_argument('--show-errors', nargs='?', default=False, const=True, help='show errors')
parser.add_argument('--normalize-args', nargs='+', required=False, choices=['fixed-size-array', 'tuples', 'string-bytes'], help='normalize arguments rules')
cfg = parser.parse_args()
if cfg.datasets is None:
cfg.datasets = ['storage3k'] if cfg.mode == 'storage' else ['largest1k', 'random50k', 'vyper']
if cfg.providers is None:
if cfg.mode == 'selectors':
cfg.providers = ['etherscan', 'evmole-rs', 'evmole-js', 'evmole-py', 'whatsabi', 'sevm', 'evm-hound-rs', 'heimdall-rs', 'simple']
elif cfg.mode == 'arguments':
cfg.providers = ['etherscan', 'evmole-rs', 'evmole-js', 'evmole-py', 'heimdall-rs', 'simple']
elif cfg.mode == 'mutability':
cfg.providers = ['etherscan', 'evmole-rs', 'evmole-js', 'evmole-py', 'whatsabi', 'sevm', 'heimdall-rs', 'simple']
else:
cfg.providers = []
elif cfg.mode == 'storage':
cfg.providers = ['etherscan', 'evmole-rs', 'smlxl']
print('Config:')
print('\n'.join(f' {field} = {getattr(cfg, field)}' for field in vars(cfg)), '\n')

Expand Down Expand Up @@ -306,3 +344,8 @@ def show_arguments_or_mutability(providers: list[str], all_results: list, show_e
x['dataset'] += '/strict'
results.append(x)
show_arguments_or_mutability(cfg.providers, results, cfg.show_errors)

elif cfg.mode == 'storage':
# results = [process_storage(d, cfg.providers, cfg.results_dir) for d in cfg.datasets]
results = [process_storage('storage3k', cfg.providers, cfg.results_dir)]
show_arguments_or_mutability(cfg.providers, results, cfg.show_errors)
98 changes: 98 additions & 0 deletions benchmark/providers/etherscan/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,105 @@ def join_inputs(inputs) -> str:
n += ','
return n[:-1]

def process_storage_mapping(types, k, v) -> str:
kt = types[k]
vt = types[v]
if isinstance(vt, str):
return f'mapping({kt} => {vt})'

if isinstance(vt, dict):
assert len(vt) == 1
val = process_storage_mapping(types, *list(vt.items())[0])
return f'mapping({kt} => {val})'

if isinstance(vt, tuple):
if len(vt) == 1:
# struct with only 1 field:
return process_storage_mapping(types, k, vt[0]['type'])
else:
return f'mapping({kt} => struct_{len(vt)}_fields)'

if isinstance(vt, list):
val = process_storage_dynarray(types, types[vt[0]])
return f'mapping({kt} => {val})'

raise Exception(f'Unsupported map type {kt} / {vt}')

def process_storage_dynarray(types, base) -> str:
if isinstance(base, str):
return f'{base}[]'
if isinstance(base, tuple):
if len(base) == 1:
return process_storage_dynarray(types, base[0]) + '[]'
else:
return f'struct_{len(base)}_fields[]'

if isinstance(base, list):
return process_storage_dynarray(types, types[base[0]]) + '[]'

raise Exception(f'Unsupported dynamic array base type {base}')

def process_storage_value(types, base_slot: int, offset, value) -> dict[str, str]:
key = f'{base_slot:064x}_{offset}'
if isinstance(value, str):
return {key: value}
elif isinstance(value, tuple):
assert offset == 0
ret: dict[str, str] = {}
for y in value:
r = process_storage_value(types, base_slot + int(y['slot']), y['offset'], types[ y['type'] ])
ret.update(r)
return ret
elif isinstance(value, dict):
assert len(value) == 1
k, v = list(value.items())[0]
v = process_storage_mapping(types, k, v)
return {key: v}
elif isinstance(value, list):
base = types[ value[0] ]
v = process_storage_dynarray(types, base)
return {key: v}
else:
raise Exception(f'Unsupported value type {value}')

def process_storage(sl):
"""
Experimental code, not 100% accurate benchmark
"""
types = {}
for (tname, tinfo) in (sl['types'] or {}).items():
tvalue = None
match tinfo['encoding']:
case 'inplace':
if 'members' in tinfo:
assert tinfo['label'].startswith('struct')
tvalue = tuple(tinfo['members'])
else:
tvalue = tinfo['label']
case 'mapping':
tvalue = {tinfo['key']: tinfo['value']}
case 'bytes':
tvalue = tinfo['label']
case 'dynamic_array':
tvalue = [ tinfo['base'] ]
case _:
raise Exception(f'Unsupported type {tinfo}')

if isinstance(tvalue, str):
tvalue = tvalue.replace('address payable', 'address')
tvalue = re.sub(r'contract \w+', 'address', tvalue)
types[tname] = tvalue

ret = {}
for x in sl['storage']:
r = process_storage_value(types, int(x['slot']), x['offset'], types[ x['type'] ])
ret.update(r)

return ret

def process(data, mode):
if mode == 'storage':
return process_storage(data['storageLayout'])
ret = {}
for x in data['abi']:
if x['type'] != 'function':
Expand Down
Loading

0 comments on commit ddd835c

Please sign in to comment.