Skip to content

Commit

Permalink
add function arguments extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
cdump committed Dec 13, 2023
1 parent 689ef5a commit c96e550
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 17 deletions.
2 changes: 1 addition & 1 deletion benchmark/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
PROVIDER_BASE = etherscan
PROVIDERS_SELECTORS ?= simple whatsabi evm-hound-rs evmole-py evmole-js
PROVIDERS_ARGUMENTS ?= evmole-py
PROVIDERS_ARGUMENTS ?= simple evmole-py evmole-js

DATASETS ?= largest1k random50k vyper
DOCKER ?= docker
Expand Down
25 changes: 14 additions & 11 deletions benchmark/providers/evmole-js/main.mjs
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
import {readdirSync, readFileSync, writeFileSync} from 'fs'

import {functionSelectors} from './js/src/index.js'
import {functionArguments, functionSelectors} from './js/src/index.js'

const argv = process.argv;
if (argv.length < 5) {
console.log('Usage: node main.js MODE INPUT_DIR OUTPUT_FILE')
console.log('Usage: node main.js MODE INPUT_DIR OUTPUT_FILE [SELCTORS_FILE]')
process.exit(1)
}

let selectors = {}
const mode = argv[2];
if (mode != 'selectors') {
console.log('Only "selectors" mode supported, got ', mode)
process.exit(1)
}
const indir = argv[3];
const outfile = argv[4];

if (mode === 'arguments') {
selectors = JSON.parse(readFileSync(argv[5]));
}

const res = Object.fromEntries(
readdirSync(indir).map(
file => [
file,
functionSelectors(JSON.parse(readFileSync(`${indir}/${file}`))['code'])
]
readdirSync(indir).map((file) => {
const code = JSON.parse(readFileSync(`${indir}/${file}`))['code']
let r = mode === 'arguments'
? Object.fromEntries(selectors[file].map((s) => [s, functionArguments(code, s)]))
: functionSelectors(code);
return [file, r];
}
)
);
writeFileSync(outfile, JSON.stringify(res), 'utf8');
1 change: 1 addition & 0 deletions benchmark/providers/evmole-py/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
indir = sys.argv[2]
outfile = sys.argv[3]

selectors = {}
if mode == 'arguments':
selectors_file = sys.argv[4]
with open(selectors_file, 'r') as fh:
Expand Down
23 changes: 18 additions & 5 deletions benchmark/providers/simple/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import sys

def process(code: bytes) -> list[str]:
def extract_selectors(code: bytes) -> list[str]:
ret = []
for i in range(len(code) - 5):
# PUSH2/PUSH3
Expand All @@ -15,21 +15,34 @@ def process(code: bytes) -> list[str]:

return [s.hex().zfill(8) for s in ret]

def extract_arguments(code: bytes, selector: bytes) -> str:
return ''


if len(sys.argv) < 4:
print('Usage: python3 main.py MODE INPUT_DIR OUTPUT_FILE')
print('Usage: python3 main.py MODE INPUT_DIR OUTPUT_FILE [SELECTORS_FILE]')
sys.exit(1)


ret = {}
mode = sys.argv[1]
assert mode == 'selectors', f'only "selectors" mode supported, got {mode}'
indir = sys.argv[2]
outfile = sys.argv[3]

selectors = {}
if mode == 'arguments':
selectors_file = sys.argv[4]
with open(selectors_file, 'r') as fh:
selectors = json.load(fh)

for fname in os.listdir(indir):
with open(f'{indir}/{fname}', 'r') as fh:
d = json.load(fh)
ret[fname] = process(bytes.fromhex(d['code'][2:]))
code = bytes.fromhex(d['code'][2:])
if mode == 'arguments':
r = {s: extract_arguments(code, bytes.fromhex(s)) for s in selectors[fname]}
else:
r = extract_selectors(code)
ret[fname] = r

with open(outfile, 'w') as fh:
json.dump(ret, fh)
1 change: 1 addition & 0 deletions evmole/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .selectors import function_selectors
from .arguments import function_arguments
147 changes: 147 additions & 0 deletions evmole/arguments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from .utils import to_bytes
from .evm.vm import Vm
from .evm.opcodes import Op

from .selectors import CallData


class CallDataArgument(bytes):
offset: int
dynamic: bool

def __new__(cls, *, offset: int, dynamic: bool = False, val: bytes = b'\x00' * 32):
v = super().__new__(cls, val)
v.dynamic = dynamic
v.offset = offset
return v

def __repr__(self):
return f'arg({self.offset},{self.dynamic})'


class CallDataArgumentDynamicLength(bytes):
offset: int

def __new__(cls, *, offset: int):
v = super().__new__(cls, (1).to_bytes(32, 'big'))
v.offset = offset
return v

def __repr__(self):
return f'dlen({self.offset})'


class CallDataArgumentDynamic(bytes):
offset: int

def __new__(cls, *, offset: int, val: bytes = b'\x00' * 32):
v = super().__new__(cls, val)
v.offset = offset
return v

def __repr__(self):
return f'darg({self.offset})'


def function_arguments(code: bytes | str, selector: bytes | str, gas_limit: int = int(1e4)) -> str:
bytes_selector = to_bytes(selector)
vm = Vm(code=to_bytes(code), calldata=CallData(bytes_selector))
gas_used = 0
inside_function = False
args: dict[int, str] = {}
blacklisted_ops: set[Op] = set()
while not vm.stopped:
try:
ret = vm.step(blacklisted_ops)
gas_used += ret[1]
if gas_used > gas_limit:
raise Exception(f'gas overflow: {gas_used} > {gas_limit}')

if inside_function:
# print(vm, '\n')
# print(ret)
pass
except Exception as ex:
_ = ex
# print(ex)
# raise ex
break

if inside_function is False:
if ret[0] in {Op.EQ, Op.XOR, Op.SUB}:
p = int.from_bytes(vm.stack.peek(), 'big')
if p == (1 if ret[0] == Op.EQ else 0):
inside_function = bytes(ret[2]).endswith(bytes_selector)
continue

# print(ret)
match ret:
case (Op.CALLDATASIZE, _):
vm.stack.pop()
vm.stack.push_uint(8192)

case (Op.CALLDATALOAD, _, CallDataArgument() as arg):
args[arg.offset] = 'bytes'
vm.stack.pop()
v = CallDataArgumentDynamicLength(offset=arg.offset)
vm.stack.push(v)

case (Op.CALLDATALOAD, _, CallDataArgumentDynamic() as arg):
vm.stack.pop()
v = CallDataArgument(offset=arg.offset, dynamic=True)
vm.stack.push(v)

case (Op.CALLDATALOAD, _, bytes() as offset):
off = int.from_bytes(offset, 'big')
if off >= 4:
vm.stack.pop()
vm.stack.push(CallDataArgument(offset=off))
args[off] = 'uint256'

case (Op.ADD, _, CallDataArgument() as cd, bytes() as ot) | (Op.ADD, _, bytes() as ot, CallDataArgument() as cd):
v = vm.stack.pop()
if int.from_bytes(ot, 'big') == 4:
vm.stack.push(CallDataArgument(offset=cd.offset, val=v))
else:
vm.stack.push(CallDataArgumentDynamic(offset=cd.offset))

case (Op.ADD, _, CallDataArgumentDynamic() as cd, _) | (Op.ADD, _, _, CallDataArgumentDynamic() as cd):
v = vm.stack.pop()
v = CallDataArgumentDynamic(offset=cd.offset, val=v)
vm.stack.push(v)

case (Op.SHL, _, bytes() as ot, CallDataArgumentDynamicLength() as arg) if int.from_bytes(ot, 'big') == 5:
args[arg.offset] = 'uint256[]'

# fmt: off
case (Op.MUL, _, CallDataArgumentDynamicLength() as arg, bytes() as ot) | \
(Op.MUL, _, bytes() as ot, CallDataArgumentDynamicLength() as arg) if int.from_bytes(ot, 'big') == 32:
# fmt: on
args[arg.offset] = 'uint256[]'

case (Op.AND, _, CallDataArgument() as arg, bytes() as ot) | (Op.AND, _, bytes() as ot, CallDataArgument() as arg):
# 0x0000ffff
v = int.from_bytes(ot, 'big')
if (v & (v + 1)) == 0:
bl = v.bit_length()
t = 'address' if bl == 160 else f'uint{bl}'
args[arg.offset] = f'{t}[]' if arg.dynamic else t
else:
# 0xffff0000
v = int.from_bytes(ot, 'little')
if (v & (v + 1)) == 0:
bl = v.bit_length() // 8
t = f'bytes{bl}'
args[arg.offset] = f'{t}[]' if arg.dynamic else t

case (Op.ISZERO, _, CallDataArgument() as arg):
args[arg.offset] = 'bool[]' if arg.dynamic else 'bool'

case (Op.SIGNEXTEND, _, s0, CallDataArgument() as arg):
t = f'int{(s0+1)*8}'
args[arg.offset] = f'{t}[]' if arg.dynamic else t

# case (Op.LT, _, CallDataArgument() as arg, _):
# args[arg.offset] = 'uint8' # enum

return ','.join(v[1] for v in sorted(args.items()))
Loading

0 comments on commit c96e550

Please sign in to comment.