diff --git a/benchmark/Makefile b/benchmark/Makefile index cbcbac4..1028f34 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -1,6 +1,6 @@ PROVIDER_BASE = etherscan PROVIDERS_SELECTORS ?= simple whatsabi evm-hound-rs evmole-py evmole-js -PROVIDERS_ARGUMENTS ?= evmole-py +PROVIDERS_ARGUMENTS ?= simple evmole-py evmole-js DATASETS ?= largest1k random50k vyper DOCKER ?= docker diff --git a/benchmark/providers/evmole-js/main.mjs b/benchmark/providers/evmole-js/main.mjs index f44a435..afc1a70 100644 --- a/benchmark/providers/evmole-js/main.mjs +++ b/benchmark/providers/evmole-js/main.mjs @@ -1,27 +1,30 @@ import {readdirSync, readFileSync, writeFileSync} from 'fs' -import {functionSelectors} from './js/src/index.js' +import {functionArguments, functionSelectors} from './js/src/index.js' const argv = process.argv; if (argv.length < 5) { - console.log('Usage: node main.js MODE INPUT_DIR OUTPUT_FILE') + console.log('Usage: node main.js MODE INPUT_DIR OUTPUT_FILE [SELCTORS_FILE]') process.exit(1) } +let selectors = {} const mode = argv[2]; -if (mode != 'selectors') { - console.log('Only "selectors" mode supported, got ', mode) - process.exit(1) -} const indir = argv[3]; const outfile = argv[4]; +if (mode === 'arguments') { + selectors = JSON.parse(readFileSync(argv[5])); +} + const res = Object.fromEntries( - readdirSync(indir).map( - file => [ - file, - functionSelectors(JSON.parse(readFileSync(`${indir}/${file}`))['code']) - ] + readdirSync(indir).map((file) => { + const code = JSON.parse(readFileSync(`${indir}/${file}`))['code'] + let r = mode === 'arguments' + ? Object.fromEntries(selectors[file].map((s) => [s, functionArguments(code, s)])) + : functionSelectors(code); + return [file, r]; + } ) ); writeFileSync(outfile, JSON.stringify(res), 'utf8'); diff --git a/benchmark/providers/evmole-py/main.py b/benchmark/providers/evmole-py/main.py index 6a09eda..334e0e0 100644 --- a/benchmark/providers/evmole-py/main.py +++ b/benchmark/providers/evmole-py/main.py @@ -14,6 +14,7 @@ indir = sys.argv[2] outfile = sys.argv[3] +selectors = {} if mode == 'arguments': selectors_file = sys.argv[4] with open(selectors_file, 'r') as fh: diff --git a/benchmark/providers/simple/main.py b/benchmark/providers/simple/main.py index 2df02be..a2960c1 100644 --- a/benchmark/providers/simple/main.py +++ b/benchmark/providers/simple/main.py @@ -2,7 +2,7 @@ import os import sys -def process(code: bytes) -> list[str]: +def extract_selectors(code: bytes) -> list[str]: ret = [] for i in range(len(code) - 5): # PUSH2/PUSH3 @@ -15,21 +15,34 @@ def process(code: bytes) -> list[str]: return [s.hex().zfill(8) for s in ret] +def extract_arguments(code: bytes, selector: bytes) -> str: + return '' + if len(sys.argv) < 4: - print('Usage: python3 main.py MODE INPUT_DIR OUTPUT_FILE') + print('Usage: python3 main.py MODE INPUT_DIR OUTPUT_FILE [SELECTORS_FILE]') sys.exit(1) - ret = {} mode = sys.argv[1] -assert mode == 'selectors', f'only "selectors" mode supported, got {mode}' indir = sys.argv[2] outfile = sys.argv[3] + +selectors = {} +if mode == 'arguments': + selectors_file = sys.argv[4] + with open(selectors_file, 'r') as fh: + selectors = json.load(fh) + for fname in os.listdir(indir): with open(f'{indir}/{fname}', 'r') as fh: d = json.load(fh) - ret[fname] = process(bytes.fromhex(d['code'][2:])) + code = bytes.fromhex(d['code'][2:]) + if mode == 'arguments': + r = {s: extract_arguments(code, bytes.fromhex(s)) for s in selectors[fname]} + else: + r = extract_selectors(code) + ret[fname] = r with open(outfile, 'w') as fh: json.dump(ret, fh) diff --git a/evmole/__init__.py b/evmole/__init__.py index 6a5da6c..6ecfc98 100644 --- a/evmole/__init__.py +++ b/evmole/__init__.py @@ -1 +1,2 @@ from .selectors import function_selectors +from .arguments import function_arguments diff --git a/evmole/arguments.py b/evmole/arguments.py new file mode 100644 index 0000000..c6cd44c --- /dev/null +++ b/evmole/arguments.py @@ -0,0 +1,147 @@ +from .utils import to_bytes +from .evm.vm import Vm +from .evm.opcodes import Op + +from .selectors import CallData + + +class CallDataArgument(bytes): + offset: int + dynamic: bool + + def __new__(cls, *, offset: int, dynamic: bool = False, val: bytes = b'\x00' * 32): + v = super().__new__(cls, val) + v.dynamic = dynamic + v.offset = offset + return v + + def __repr__(self): + return f'arg({self.offset},{self.dynamic})' + + +class CallDataArgumentDynamicLength(bytes): + offset: int + + def __new__(cls, *, offset: int): + v = super().__new__(cls, (1).to_bytes(32, 'big')) + v.offset = offset + return v + + def __repr__(self): + return f'dlen({self.offset})' + + +class CallDataArgumentDynamic(bytes): + offset: int + + def __new__(cls, *, offset: int, val: bytes = b'\x00' * 32): + v = super().__new__(cls, val) + v.offset = offset + return v + + def __repr__(self): + return f'darg({self.offset})' + + +def function_arguments(code: bytes | str, selector: bytes | str, gas_limit: int = int(1e4)) -> str: + bytes_selector = to_bytes(selector) + vm = Vm(code=to_bytes(code), calldata=CallData(bytes_selector)) + gas_used = 0 + inside_function = False + args: dict[int, str] = {} + blacklisted_ops: set[Op] = set() + while not vm.stopped: + try: + ret = vm.step(blacklisted_ops) + gas_used += ret[1] + if gas_used > gas_limit: + raise Exception(f'gas overflow: {gas_used} > {gas_limit}') + + if inside_function: + # print(vm, '\n') + # print(ret) + pass + except Exception as ex: + _ = ex + # print(ex) + # raise ex + break + + if inside_function is False: + if ret[0] in {Op.EQ, Op.XOR, Op.SUB}: + p = int.from_bytes(vm.stack.peek(), 'big') + if p == (1 if ret[0] == Op.EQ else 0): + inside_function = bytes(ret[2]).endswith(bytes_selector) + continue + + # print(ret) + match ret: + case (Op.CALLDATASIZE, _): + vm.stack.pop() + vm.stack.push_uint(8192) + + case (Op.CALLDATALOAD, _, CallDataArgument() as arg): + args[arg.offset] = 'bytes' + vm.stack.pop() + v = CallDataArgumentDynamicLength(offset=arg.offset) + vm.stack.push(v) + + case (Op.CALLDATALOAD, _, CallDataArgumentDynamic() as arg): + vm.stack.pop() + v = CallDataArgument(offset=arg.offset, dynamic=True) + vm.stack.push(v) + + case (Op.CALLDATALOAD, _, bytes() as offset): + off = int.from_bytes(offset, 'big') + if off >= 4: + vm.stack.pop() + vm.stack.push(CallDataArgument(offset=off)) + args[off] = 'uint256' + + case (Op.ADD, _, CallDataArgument() as cd, bytes() as ot) | (Op.ADD, _, bytes() as ot, CallDataArgument() as cd): + v = vm.stack.pop() + if int.from_bytes(ot, 'big') == 4: + vm.stack.push(CallDataArgument(offset=cd.offset, val=v)) + else: + vm.stack.push(CallDataArgumentDynamic(offset=cd.offset)) + + case (Op.ADD, _, CallDataArgumentDynamic() as cd, _) | (Op.ADD, _, _, CallDataArgumentDynamic() as cd): + v = vm.stack.pop() + v = CallDataArgumentDynamic(offset=cd.offset, val=v) + vm.stack.push(v) + + case (Op.SHL, _, bytes() as ot, CallDataArgumentDynamicLength() as arg) if int.from_bytes(ot, 'big') == 5: + args[arg.offset] = 'uint256[]' + + # fmt: off + case (Op.MUL, _, CallDataArgumentDynamicLength() as arg, bytes() as ot) | \ + (Op.MUL, _, bytes() as ot, CallDataArgumentDynamicLength() as arg) if int.from_bytes(ot, 'big') == 32: + # fmt: on + args[arg.offset] = 'uint256[]' + + case (Op.AND, _, CallDataArgument() as arg, bytes() as ot) | (Op.AND, _, bytes() as ot, CallDataArgument() as arg): + # 0x0000ffff + v = int.from_bytes(ot, 'big') + if (v & (v + 1)) == 0: + bl = v.bit_length() + t = 'address' if bl == 160 else f'uint{bl}' + args[arg.offset] = f'{t}[]' if arg.dynamic else t + else: + # 0xffff0000 + v = int.from_bytes(ot, 'little') + if (v & (v + 1)) == 0: + bl = v.bit_length() // 8 + t = f'bytes{bl}' + args[arg.offset] = f'{t}[]' if arg.dynamic else t + + case (Op.ISZERO, _, CallDataArgument() as arg): + args[arg.offset] = 'bool[]' if arg.dynamic else 'bool' + + case (Op.SIGNEXTEND, _, s0, CallDataArgument() as arg): + t = f'int{(s0+1)*8}' + args[arg.offset] = f'{t}[]' if arg.dynamic else t + + # case (Op.LT, _, CallDataArgument() as arg, _): + # args[arg.offset] = 'uint8' # enum + + return ','.join(v[1] for v in sorted(args.items())) diff --git a/js/src/arguments.js b/js/src/arguments.js new file mode 100644 index 0000000..2d9d544 --- /dev/null +++ b/js/src/arguments.js @@ -0,0 +1,228 @@ +import Op from './evm/opcodes.js' +import Vm from './evm/vm.js' +import { + hexToUint8Array, + bigIntToUint8Array, + uint8ArrayToBigInt, + bigIntBitLength, +} from './utils.js' +import { CallData } from './selectors.js' + +class CallDataArgument extends Uint8Array { + constructor(offset, dynamic = false, val) { + const v = super(val !== undefined ? val : new Uint8Array(32)) + v.offset = offset + v.dynamic = dynamic + return v + } + toBigInt() { + return uint8ArrayToBigInt(this) + } +} + +class CallDataArgumentDynamicLength extends Uint8Array { + constructor(offset) { + const v = super(bigIntToUint8Array(1n)) + v.offset = offset + return v + } + toBigInt() { + return uint8ArrayToBigInt(this) + } +} + +class CallDataArgumentDynamic extends Uint8Array { + constructor(offset, val = new Uint8Array(32)) { + const v = super(val) + v.offset = offset + return v + } + toBigInt() { + return uint8ArrayToBigInt(this) + } +} + +export function functionArguments( + code_hex_string, + selector_hex_string, + gas_limit = 1e4, +) { + const code = hexToUint8Array(code_hex_string) + const selector = hexToUint8Array(selector_hex_string) + const vm = new Vm(code, new CallData(selector)) + + let gas_used = 0 + let inside_function = false + let args = {} + const blacklisted_ops = new Set([]) + + while (!vm.stopped) { + // console.log(vm.toString()); + let ret + try { + ret = vm.step(blacklisted_ops) + gas_used += ret[1] + if (gas_used > gas_limit) { + throw `gas overflow: ${gas_used} > ${gas_limit}` + } + + if (inside_function) { + // console.log(vm.toString()) + } + } catch (err) { + // console.log(err); + // throw err; + break + } + const op = ret[0] + + if (inside_function == false) { + if (op === Op.EQ || op == Op.XOR || op == Op.SUB) { + const p = vm.stack.peek()[31] + if (p === (op === Op.EQ ? 1 : 0)) { + const a = ret[2].slice(-4) + inside_function = selector.every((v, i) => v === a[i]) + } + } + + continue + } + + switch (op) { + case Op.CALLDATASIZE: + vm.stack.pop() + vm.stack.push_uint(8192n) + break + + case Op.CALLDATALOAD: + { + const arg = ret[2] + if (arg instanceof CallDataArgument) { + args[arg.offset] = 'bytes' + vm.stack.pop() + vm.stack.push(new CallDataArgumentDynamicLength(arg.offset)) + } else if (arg instanceof CallDataArgumentDynamic) { + vm.stack.pop() + vm.stack.push(new CallDataArgument(arg.offset, true)) + } else { + const off = uint8ArrayToBigInt(arg) + if (off >= 4n) { + vm.stack.pop() + vm.stack.push(new CallDataArgument(Number(off))) + args[off] = 'uint256' + } + } + } + break + + case Op.ADD: + { + const [r2, r3] = [ret[2], ret[3]] + if ( + r2 instanceof CallDataArgument || + r3 instanceof CallDataArgument + ) { + const [arg, ot] = + r2 instanceof CallDataArgument ? [r2, r3] : [r3, r2] + const v = vm.stack.pop() + if (uint8ArrayToBigInt(ot) === 4n) { + vm.stack.push(new CallDataArgument(arg.offset, false, v)) + } else { + vm.stack.push(new CallDataArgumentDynamic(arg.offset)) + } + } + + if ( + r2 instanceof CallDataArgumentDynamic || + r3 instanceof CallDataArgumentDynamic + ) { + const v = vm.stack.pop() + const arg = r2 instanceof CallDataArgumentDynamic ? r2 : r3 + vm.stack.push(new CallDataArgumentDynamic(arg.offset, v)) + } + } + break + + case Op.SHL: + { + const [r2, arg] = [uint8ArrayToBigInt(ret[2]), ret[3]] + if (r2 == 5n && arg instanceof CallDataArgumentDynamicLength) { + args[arg.offset] = 'uint256[]' + } + } + break + + case Op.MUL: + { + if ( + ret[3] instanceof CallDataArgumentDynamicLength && + uint8ArrayToBigInt(ret[2]) == 32n + ) { + args[ret[3].offset] = 'uint256[]' + } + + if ( + ret[2] instanceof CallDataArgumentDynamicLength && + uint8ArrayToBigInt(ret[3]) == 32n + ) { + args[ret[2].offset] = 'uint256[]' + } + } + break + + case Op.AND: + { + const [r2, r3] = [ret[2], ret[3]] + if ( + r2 instanceof CallDataArgument || + r3 instanceof CallDataArgument + ) { + const [arg, ot] = + r2 instanceof CallDataArgument ? [r2, r3] : [r3, r2] + + const v = uint8ArrayToBigInt(ot) + if ((v & (v + 1n)) === 0n) { + // 0x0000ffff + const bl = bigIntBitLength(v) + const t = bl === 160 ? 'address' : `uint${bl}` + args[arg.offset] = arg.dynamic ? `${t}[]` : t + } else { + // 0xffff0000 + const v = BigInt(uint8ArrayToBigInt(ot.slice().reverse())) + if ((v & (v + 1n)) === 0n) { + const bl = Math.floor(bigIntBitLength(v) / 8) + const t = `bytes${bl}` + args[arg.offset] = arg.dynamic ? `${t}[]` : t + } + } + } + } + break + + case Op.ISZERO: + { + const arg = ret[2] + if (arg instanceof CallDataArgument) { + args[arg.offset] = arg.dynamic ? 'bool[]' : 'bool' + } + } + break + + case Op.SIGNEXTEND: + { + const arg = ret[3] + if (arg instanceof CallDataArgument) { + const t = `int${(Number(ret[2]) + 1) * 8}` + args[arg.offset] = arg.dynamic ? `${t}[]` : t + } + } + break + } + } + + var collator = new Intl.Collator([], { numeric: true }) + return Object.entries(args) + .sort((a, b) => collator.compare(a, b)) + .map((v) => v[1]) + .join(',') +} diff --git a/js/src/index.js b/js/src/index.js index 8c6a949..a7b8d8a 100644 --- a/js/src/index.js +++ b/js/src/index.js @@ -1 +1,2 @@ export {functionSelectors} from './selectors.js' +export {functionArguments} from './arguments.js'