Skip to content

Commit

Permalink
Typehoon: Support nested global structs. (angr#3261)
Browse files Browse the repository at this point in the history
* Typehoon: Support nested global structs.

* Lint.
  • Loading branch information
ltfish authored Mar 30, 2022
1 parent 5022886 commit d6711a9
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 71 deletions.
11 changes: 6 additions & 5 deletions angr/analyses/decompiler/clinic.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,8 @@ def _recover_and_link_variables(self, ail_graph, arg_list):
for variable in var_manager.variables_with_manual_types:
vartype = var_manager.types.get(variable, None)
if vartype is not None:
groundtruth[vr.var_to_typevar[variable]] = vartype
for tv in vr.var_to_typevars[variable]:
groundtruth[tv] = vartype
# clean up existing types for this function
var_manager.remove_types()
# TODO: Type inference for global variables
Expand All @@ -659,12 +660,12 @@ def _recover_and_link_variables(self, ail_graph, arg_list):
else:
must_struct = None
try:
tp = self.project.analyses.Typehoon(vr.type_constraints, kb=tmp_kb, var_mapping=vr.var_to_typevar,
tp = self.project.analyses.Typehoon(vr.type_constraints, kb=tmp_kb, var_mapping=vr.var_to_typevars,
must_struct=must_struct, ground_truth=groundtruth)
# tp.pp_constraints()
# tp.pp_solution()
tp.update_variable_types(self.function.addr, vr.var_to_typevar)
tp.update_variable_types('global', vr.var_to_typevar)
tp.update_variable_types(self.function.addr, vr.var_to_typevars)
tp.update_variable_types('global', vr.var_to_typevars)
except Exception: # pylint:disable=broad-except
l.warning("Typehoon analysis failed. Variables will not have types. Please report to GitHub.",
exc_info=True)
Expand All @@ -690,7 +691,7 @@ def _recover_and_link_variables(self, ail_graph, arg_list):

if self._cache is not None:
self._cache.type_constraints = vr.type_constraints
self._cache.var_to_typevar = vr.var_to_typevar
self._cache.var_to_typevars = vr.var_to_typevars

return tmp_kb

Expand Down
12 changes: 10 additions & 2 deletions angr/analyses/decompiler/structured_codegen/c.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ class CStructField(CExpression):

__slots__ = ('struct_type', 'offset', 'field', 'tags', )

def __init__(self, struct_type, offset, field, tags=None, **kwargs):
def __init__(self, struct_type: SimStruct, offset, field, tags=None, **kwargs):

super().__init__(**kwargs)

Expand All @@ -978,7 +978,7 @@ def __init__(self, struct_type, offset, field, tags=None, **kwargs):

@property
def type(self):
return self.struct_type
return self.struct_type.fields[self.field]

def c_repr_chunks(self, indent=0, asexpr=False):
yield str(self.field), self
Expand Down Expand Up @@ -1184,6 +1184,14 @@ def __init__(self, variable: CExpression, index: CExpression, variable_type=None
self._type = variable_type
self.tags = tags

if self._type is None and isinstance(self.variable, (CVariable, CIndexedVariable, CVariableField)) \
and self.variable.type is not None:
u = unpack_typeref(self.variable.type)
if isinstance(u, SimTypePointer):
self._type = u.pts_to
elif isinstance(u, SimTypeArray):
self._type = u.elem_type

@property
def type(self):
return self._type
Expand Down
35 changes: 20 additions & 15 deletions angr/analyses/typehoon/simple_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def solve(self):
subtypevars, supertypevars = self._calculate_closure(constraints)
self._find_recursive_types(subtypevars)
self._compute_lower_upper_bounds(subtypevars, supertypevars)
# self._unify_struct_fields()
self._lower_struct_fields()
# import pprint
# print("Lower bounds")
# pprint.pprint(self._lower_bounds)
Expand Down Expand Up @@ -372,30 +372,35 @@ def _compute_lower_upper_bounds(self, subtypevars, supertypevars):
subtype_infimum = self._join(subtypevar, typevar, translate=self._get_lower_bound)
self._lower_bounds[subtypevar] = subtype_infimum

def _unify_struct_fields(self):
def _lower_struct_fields(self):

for v, ptrv_subtype in self._lower_bounds.items():
if isinstance(v, DerivedTypeVariable) and isinstance(v.label, HasField):
# tv_680: ptr32(struct{0: int32})
# tv_680.load.<32>@0: ptr32(struct{5: int8})
# becomes
# tv_680: ptr32(struct{0: ptr32(struct{5: int8})})

for outer, outer_lb in self._lower_bounds.items():
if isinstance(outer, DerivedTypeVariable) and isinstance(outer.label, HasField):
# unpack v
ptrv = v.type_var.type_var
base = outer.type_var.type_var

if ptrv in self._lower_bounds:
# unification
if base in self._lower_bounds:

v_subtype = self._lower_bounds[v]
base_lb = self._lower_bounds[base]

# make sure it's a pointer at the offset that v.label specifies
if isinstance(ptrv_subtype, Pointer):
if isinstance(ptrv_subtype.basetype, Struct):
the_field = ptrv_subtype.basetype.fields[v.label.offset]
new_field = self._join(the_field, v_subtype, translate=self._get_lower_bound)
if isinstance(base_lb, Pointer):
if isinstance(base_lb.basetype, Struct):
the_field = base_lb.basetype.fields[outer.label.offset]
# replace this field
new_field = self._meet(the_field, outer_lb, translate=self._get_upper_bound)
if new_field != the_field:
new_fields = ptrv_subtype.basetype.fields.copy()
new_fields = base_lb.basetype.fields.copy()
new_fields.update(
{v.label.offset: new_field,
{outer.label.offset: new_field,
}
)
self._lower_bounds[ptrv] = ptrv_subtype.__class__(Struct(new_fields))
self._lower_bounds[base] = base_lb.__class__(Struct(new_fields))

def _abstract(self, t): # pylint:disable=no-self-use
return t.__class__
Expand Down
34 changes: 22 additions & 12 deletions angr/analyses/typehoon/typehoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class Typehoon(Analysis):
User may specify ground truth, which will override all types at certain program points during constraint solving.
"""
def __init__(self, constraints, ground_truth=None, var_mapping: Optional[Dict['SimVariable','TypeVariable']]=None,
def __init__(self, constraints, ground_truth=None,
var_mapping: Optional[Dict['SimVariable',Set['TypeVariable']]]=None,
prioritize_char_array_over_struct: bool=True,
must_struct: Optional[Set['TypeVariable']]=None,
):
Expand Down Expand Up @@ -63,16 +64,17 @@ def __init__(self, constraints, ground_truth=None, var_mapping: Optional[Dict['S
# Public methods
#

def update_variable_types(self, func_addr: Union[int,str], var_to_typevar):
def update_variable_types(self, func_addr: Union[int,str], var_to_typevars):

for var, typevar in var_to_typevar.items():
type_ = self.simtypes_solution.get(typevar, None)
if type_ is not None:
# print("{} -> {}: {}".format(var, typevar, type_))
name = None
if isinstance(type_, SimStruct):
name = type_.name
self.kb.variables[func_addr].set_variable_type(var, type_, name=name)
for var, typevars in var_to_typevars.items():
for typevar in typevars:
type_ = self.simtypes_solution.get(typevar, None)
if type_ is not None:
# print("{} -> {}: {}".format(var, typevar, type_))
name = None
if isinstance(type_, SimStruct):
name = type_.name
self.kb.variables[func_addr].set_variable_type(var, type_, name=name)

def pp_constraints(self) -> None:
"""
Expand All @@ -81,7 +83,11 @@ def pp_constraints(self) -> None:
if self._var_mapping is None:
raise ValueError("Variable mapping does not exist.")

typevar_to_var = dict((v, k) for k, v in self._var_mapping.items())
typevar_to_var = { }
for k, typevars in self._var_mapping.items():
for tv in typevars:
typevar_to_var[tv] = k

print("### {} constraints".format(len(self._constraints)))
for constraint in self._constraints:
print(" " + constraint.pp_str(typevar_to_var))
Expand All @@ -96,7 +102,11 @@ def pp_solution(self) -> None:
if self.solution is None:
raise RuntimeError("Please run type solver before calling pp_solution().")

typevar_to_var = dict((v, k) for k, v in self._var_mapping.items())
typevar_to_var = { }
for k, typevars in self._var_mapping.items():
for tv in typevars:
typevar_to_var[tv] = k

print("### {} solutions".format(len(self.solution)))
for typevar in sorted(self.solution.keys(), key=str):
sol = self.solution[typevar]
Expand Down
26 changes: 19 additions & 7 deletions angr/analyses/variable_recovery/engine_ail.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,17 @@ def _ail_handle_Load(self, expr):
addr_r = self._expr(expr.addr)
size = expr.size

return self._load(addr_r, size, expr=expr)
r = self._load(addr_r, size, expr=expr)
return r

def _ail_handle_Const(self, expr):
if self.project.loader.find_segment_containing(expr.value) is not None:
r = self._load_from_global(expr.value, 1, expr=expr)
ty = r.typevar
else:
ty = typeconsts.int_type(expr.size * self.state.arch.byte_width)
v = claripy.BVV(expr.value, expr.size * self.state.arch.byte_width)
r = RichR(v, typevar=typeconsts.int_type(expr.size * self.state.arch.byte_width))
r = RichR(v, typevar=ty)
self._reference(r, self._codeloc())
return r

Expand Down Expand Up @@ -276,14 +282,20 @@ def _ail_handle_Add(self, expr):
r1 = self._expr(arg1)

type_constraints = set()
if r0.typevar is not None and r1.data.concrete:
# addition with constants. create a derived type variable
typevar = typevars.DerivedTypeVariable(r0.typevar, typevars.AddN(r1.data._model_concrete.value))
if r0.typevar is not None:
r0_typevar = r0.typevar
else:
# create a new type variable and add constraints accordingly
r0_typevar = typevars.TypeVariable()

if r1.data.concrete:
# addition with constants. create a derived type variable
typevar = typevars.DerivedTypeVariable(r0_typevar, typevars.AddN(r1.data._model_concrete.value))
elif r1.typevar is not None:
typevar = typevars.TypeVariable()
if r0.typevar is not None and r1.typevar is not None:
type_constraints.add(typevars.Add(r0.typevar, r1.typevar, typevar))
type_constraints.add(typevars.Add(r0_typevar, r1.typevar, typevar))
else:
typevar = None

sum_ = None
if r0.data is not None and r1.data is not None:
Expand Down
53 changes: 28 additions & 25 deletions angr/analyses/variable_recovery/engine_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,41 +634,42 @@ def _load(self, richr_addr: RichR, size: int, expr=None):

elif addr.concrete:
# Loading data from memory
self._load_from_global(addr._model_concrete.value, size, expr=expr)
v = self._load_from_global(addr._model_concrete.value, size, expr=expr)
typevar = v.typevar

elif self._addr_has_concrete_base(addr) and self._parse_offseted_addr(addr) is not None:
# Loading data from a memory address with an offset
base_addr, offset, elem_size = self._parse_offseted_addr(addr)
self._load_from_global(base_addr._model_concrete.value, size, expr=expr, offset=offset,
elem_size=elem_size)
v = self._load_from_global(base_addr._model_concrete.value, size, expr=expr, offset=offset,
elem_size=elem_size)
typevar = v.typevar

# Loading data from a pointer
if richr_addr.type_constraints:
for tc in richr_addr.type_constraints:
self.state.add_type_constraint(tc)

# parse the loading offset
offset = 0
if (isinstance(richr_addr.typevar, typevars.DerivedTypeVariable) and
isinstance(richr_addr.typevar.label, typevars.AddN)):
offset = richr_addr.typevar.label.n
richr_addr_typevar = richr_addr.typevar.type_var # unpack
else:
# Loading data from a pointer
if richr_addr.type_constraints:
for tc in richr_addr.type_constraints:
self.state.add_type_constraint(tc)

# parse the loading offset
offset = 0
if (isinstance(richr_addr.typevar, typevars.DerivedTypeVariable) and
isinstance(richr_addr.typevar.label, typevars.AddN)):
offset = richr_addr.typevar.label.n
richr_addr_typevar = richr_addr.typevar.type_var # unpack
else:
richr_addr_typevar = richr_addr.typevar
richr_addr_typevar = richr_addr.typevar

if richr_addr_typevar is not None:
# create a type constraint
typevar = typevars.DerivedTypeVariable(
typevars.DerivedTypeVariable(richr_addr_typevar, typevars.Load()),
typevars.HasField(size * self.state.arch.byte_width, offset)
)
self.state.add_type_constraint(typevars.Existence(typevar))
if richr_addr_typevar is not None:
# create a type constraint
typevar = typevars.DerivedTypeVariable(
typevars.DerivedTypeVariable(richr_addr_typevar, typevars.Load()),
typevars.HasField(size * self.state.arch.byte_width, offset)
)
self.state.add_type_constraint(typevars.Existence(typevar))

return RichR(self.state.top(size * self.state.arch.byte_width), typevar=typevar)

def _load_from_global(self, addr: int, size, expr=None, offset: Optional[claripy.ast.BV]=None,
elem_size: Optional[claripy.ast.BV]=None):
elem_size: Optional[claripy.ast.BV]=None) -> RichR:

variable_manager = self.variable_manager['global']
if expr is None:
Expand All @@ -691,7 +692,7 @@ def _load_from_global(self, addr: int, size, expr=None, offset: Optional[claripy
if not existing_vars:
# is this address mapped?
if self.project.loader.find_object_containing(addr) is None:
return
return RichR(self.state.top(size * self.state.arch.byte_width))
variable = SimMemoryVariable(addr, size,
ident=variable_manager.next_variable_ident('global'),
)
Expand Down Expand Up @@ -734,6 +735,8 @@ def _load_from_global(self, addr: int, size, expr=None, offset: Optional[claripy
typevars.Existence(load_typevar)
)

return RichR(self.state.top(size * self.state.arch.byte_width), typevar=typevar)

def _read_from_register(self, offset, size, expr=None):
"""
Expand Down
19 changes: 14 additions & 5 deletions angr/analyses/variable_recovery/variable_recovery_fast.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint:disable=wrong-import-position,wrong-import-order
from typing import Optional, List, Tuple
import logging
from collections import defaultdict
Expand All @@ -10,10 +11,10 @@
from ...block import Block
from ...errors import AngrVariableRecoveryError, SimEngineError
from ...knowledge_plugins import Function
from ...sim_variable import SimStackVariable, SimRegisterVariable, SimVariable
from ...sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
from ...engines.vex.claripy.irop import vexop_to_simop
from ..forward_analysis import ForwardAnalysis, FunctionGraphVisitor
from ..typehoon.typevars import Equivalence, TypeVariable, Subtype
from ..typehoon.typevars import Equivalence, TypeVariable
from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
from .engine_vex import SimEngineVRVEX
from .engine_ail import SimEngineVRAIL
Expand Down Expand Up @@ -141,7 +142,7 @@ def merge(self, others: Tuple['VariableRecoveryFastState'],
else:
typevar = TypeVariable()
for orig_typevar in all_typevars:
merged_typeconstraints.add(Subtype(orig_typevar, typevar))
merged_typeconstraints.add(Equivalence(orig_typevar, typevar))
stack_offset_typevars[offset] = typevar

# clean up
Expand Down Expand Up @@ -222,7 +223,7 @@ def __init__(self, func, func_graph=None, max_iterations=2, low_priority=False,
self._node_iterations = defaultdict(int)

self._node_to_cc = { }
self.var_to_typevar = { }
self.var_to_typevars = defaultdict(set)
self.type_constraints = None

self._analyze()
Expand Down Expand Up @@ -345,7 +346,8 @@ def _run_on_node(self, node, state):

self._node_iterations[node.addr] += 1
self.type_constraints |= state.type_constraints
self.var_to_typevar.update(state.typevars._typevars)
for var, typevar in state.typevars._typevars.items():
self.var_to_typevars[var].add(typevar)

state.downsize()
self._outstates[node.addr] = state
Expand All @@ -367,6 +369,13 @@ def _post_analysis(self):
state.downsize_region(state.stack_region),
)

# unify type variables for global variables
for var, typevars in self.var_to_typevars.items():
if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
sorted_typevars = list(sorted(typevars, key=lambda x: str(x))) # pylint:disable=unnecessary-lambda
for tv in sorted_typevars[1:]:
self.type_constraints.add(Equivalence(sorted_typevars[0], tv))

#
# Private methods
#
Expand Down

0 comments on commit d6711a9

Please sign in to comment.