Skip to content

Commit

Permalink
Merge pull request diffblue#1644 from NathanJPhillips/feature/string-…
Browse files Browse the repository at this point in the history
…functions-on-demand

Load string functions on demand
  • Loading branch information
smowton authored Dec 7, 2017
2 parents 9b1ef1a + ea7646b commit a3e19f7
Show file tree
Hide file tree
Showing 14 changed files with 464 additions and 310 deletions.
100 changes: 51 additions & 49 deletions src/java_bytecode/ci_lazy_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <java_bytecode/java_entry_point.h>
#include <java_bytecode/java_class_loader.h>
#include <java_bytecode/java_utils.h>
#include <util/safe_pointer.h>
#include <util/suffix.h>
#include <java_bytecode/java_string_library_preprocess.h>

Expand Down Expand Up @@ -61,14 +60,14 @@ ci_lazy_methodst::ci_lazy_methodst(
/// from the main entry point (usually provided with the --function command-
/// line option
/// \param symbol_table: global symbol table
/// \param [out] lazy_methods: map from method names to relevant symbol and
/// \param [out] method_bytecode: map from method names to relevant symbol and
/// parsed-method objects.
/// \param method_converter: Function for converting methods on demand.
/// \return Returns false on success
bool ci_lazy_methodst::operator()(
symbol_tablet &symbol_table,
lazy_methodst &lazy_methods,
method_convertert method_converter)
method_bytecodet &method_bytecode,
const method_convertert &method_converter)
{
std::vector<irep_idt> method_worklist1;
std::vector<irep_idt> method_worklist2;
Expand Down Expand Up @@ -141,21 +140,17 @@ bool ci_lazy_methodst::operator()(
{
if(!methods_already_populated.insert(mname).second)
continue;
auto findit=lazy_methods.find(mname);
if(findit==lazy_methods.end())
debug() << "CI lazy methods: elaborate " << mname << eom;
if(
method_converter(
mname,
// Note this wraps *references* to method_worklist2 & needed_classes
ci_lazy_methods_neededt(
method_worklist2, needed_classes, symbol_table)))
{
debug() << "Skip " << mname << eom;
// Couldn't convert this function
continue;
}
debug() << "CI lazy methods: elaborate " << mname << eom;
const auto &parsed_method=findit->second;
// Note this wraps *references* to method_worklist2, needed_classes:
ci_lazy_methods_neededt new_lazy_methods(
method_worklist2,
needed_classes,
symbol_table);
method_converter(
*parsed_method.first, *parsed_method.second, new_lazy_methods);
gather_virtual_callsites(
symbol_table.lookup_ref(mname).value,
virtual_callsites);
Expand Down Expand Up @@ -189,15 +184,23 @@ bool ci_lazy_methodst::operator()(

for(const auto &sym : symbol_table.symbols)
{
// Don't keep global variables (unless they're gathered below from a
// function that references them)
if(sym.second.is_static_lifetime)
continue;
if(lazy_methods.count(sym.first) &&
!methods_already_populated.count(sym.first))
{
continue;
}
if(sym.second.type.id()==ID_code)
{
// Don't keep functions that belong to this language that we haven't
// converted above
if(
method_bytecode.contains_method(sym.first) &&
!methods_already_populated.count(sym.first))
{
continue;
}
// If this is a function then add all the things used in it
gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
}
keep_symbols.add(sym.second);
}

Expand Down Expand Up @@ -263,13 +266,13 @@ void ci_lazy_methodst::resolve_method_names(
/// \param entry_points: list of fully-qualified function names that
/// we should assume are reachable
/// \param ns: global namespace
/// \param [out] lazy_methods: Populated with all Java reference types whose
/// references may be passed, directly or indirectly, to any of the functions
/// in `entry_points`.
/// \param [out] needed_lazy_methods: Populated with all Java reference types
/// whose references may be passed, directly or indirectly, to any of the
/// functions in `entry_points`.
void ci_lazy_methodst::initialize_needed_classes(
const std::vector<irep_idt> &entry_points,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods)
ci_lazy_methods_neededt &needed_lazy_methods)
{
for(const auto &mname : entry_points)
{
Expand All @@ -281,67 +284,66 @@ void ci_lazy_methodst::initialize_needed_classes(
{
const pointer_typet &original_pointer=to_pointer_type(param.type());
initialize_all_needed_classes_from_pointer(
original_pointer, ns, lazy_methods);
original_pointer, ns, needed_lazy_methods);
}
}
}

// Also add classes whose instances are magically
// created by the JVM and so won't be spotted by
// looking for constructors and calls as usual:
lazy_methods.add_needed_class("java::java.lang.String");
lazy_methods.add_needed_class("java::java.lang.Class");
lazy_methods.add_needed_class("java::java.lang.Object");
needed_lazy_methods.add_needed_class("java::java.lang.String");
needed_lazy_methods.add_needed_class("java::java.lang.Class");
needed_lazy_methods.add_needed_class("java::java.lang.Object");

// As in class_loader, ensure these classes stay available
for(const auto &id : extra_needed_classes)
lazy_methods.add_needed_class("java::" + id2string(id));
needed_lazy_methods.add_needed_class("java::" + id2string(id));
}

/// Build up list of methods for types for a pointer and any types it
/// might be subsituted for. See
/// `initialize_needed_classes` for more details.
/// \param pointer_type: The type to gather methods for.
/// \param ns: global namespace
/// \param [out] lazy_methods: Populated with all Java reference types whose
/// references may be passed, directly or indirectly, to any of the functions
/// in `entry_points
/// \param [out] needed_lazy_methods: Populated with all Java reference types
/// whose references may be passed, directly or indirectly, to any of the
/// functions in `entry_points`
void ci_lazy_methodst::initialize_all_needed_classes_from_pointer(
const pointer_typet &pointer_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods)
ci_lazy_methods_neededt &needed_lazy_methods)
{
initialize_needed_classes_from_pointer(
pointer_type, ns, lazy_methods);
initialize_needed_classes_from_pointer(pointer_type, ns, needed_lazy_methods);

const pointer_typet &subbed_pointer_type=
pointer_type_selector.convert_pointer_type(pointer_type, ns);

if(subbed_pointer_type!=pointer_type)
{
initialize_needed_classes_from_pointer(
subbed_pointer_type, ns, lazy_methods);
subbed_pointer_type, ns, needed_lazy_methods);
}
}

/// Build up list of methods for types for a specific pointer type. See
/// `initialize_needed_classes` for more details.
/// \param pointer_type: The type to gather methods for.
/// \param ns: global namespace
/// \param [out] lazy_methods: Populated with all Java reference types whose
/// references may be passed, directly or indirectly, to any of the functions
/// in `entry_points
/// \param [out] needed_lazy_methods: Populated with all Java reference types
/// whose references may be passed, directly or indirectly, to any of the
/// functions in `entry_points`
void ci_lazy_methodst::initialize_needed_classes_from_pointer(
const pointer_typet &pointer_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods)
ci_lazy_methods_neededt &needed_lazy_methods)
{
const symbol_typet &class_type=to_symbol_type(pointer_type.subtype());
const auto &param_classid=class_type.get_identifier();

if(lazy_methods.add_needed_class(param_classid))
if(needed_lazy_methods.add_needed_class(param_classid))
{
gather_field_types(pointer_type.subtype(), ns, lazy_methods);
gather_field_types(pointer_type.subtype(), ns, needed_lazy_methods);
}
}

Expand Down Expand Up @@ -462,30 +464,30 @@ void ci_lazy_methodst::gather_needed_globals(
gather_needed_globals(*opit, symbol_table, needed);
}

/// See param lazy_methods
/// See param needed_lazy_methods
/// \param class_type: root of class tree to search
/// \param ns: global namespace
/// \param [out] lazy_methods: Popualted with all Java reference types reachable
/// starting at `class_type`. For example if `class_type` is
/// \param [out] needed_lazy_methods: Popualted with all Java reference types
/// reachable starting at `class_type`. For example if `class_type` is
/// `symbol_typet("java::A")` and A has a B field, then `B` (but not `A`) will
/// noted as a needed class.
void ci_lazy_methodst::gather_field_types(
const typet &class_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods)
ci_lazy_methods_neededt &needed_lazy_methods)
{
const auto &underlying_type=to_struct_type(ns.follow(class_type));
for(const auto &field : underlying_type.components())
{
if(field.type().id()==ID_struct || field.type().id()==ID_symbol)
gather_field_types(field.type(), ns, lazy_methods);
gather_field_types(field.type(), ns, needed_lazy_methods);
else if(field.type().id()==ID_pointer)
{
// Skip array primitive pointers, for example:
if(field.type().subtype().id()!=ID_symbol)
continue;
initialize_all_needed_classes_from_pointer(
to_pointer_type(field.type()), ns, lazy_methods);
to_pointer_type(field.type()), ns, needed_lazy_methods);
}
}
}
Expand Down
84 changes: 67 additions & 17 deletions src/java_bytecode/ci_lazy_methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,67 @@

class java_string_library_preprocesst;

typedef std::pair<
const symbolt *,
const java_bytecode_parse_treet::methodt *>
lazy_method_valuet;
// Map from method id to class_method_and_bytecodet
class method_bytecodet
{
public:
/// Pair of class id and methodt
struct class_method_and_bytecodet
{
irep_idt class_id;
irep_idt method_id;
const java_bytecode_parse_treet::methodt &method;
};

typedef std::map<irep_idt, lazy_method_valuet>
lazy_methodst;
typedef optionalt<std::reference_wrapper<const class_method_and_bytecodet>>
opt_reft;

private:
typedef std::map<irep_idt, class_method_and_bytecodet> mapt;
mapt map;

public:
bool contains_method(const irep_idt &method_id) const
{
return map.count(method_id) != 0;
}

void add(const class_method_and_bytecodet &method_class_and_bytecode)
{
map.emplace(
std::make_pair(
method_class_and_bytecode.method_id, method_class_and_bytecode));
}

void add(
const irep_idt &class_id,
const irep_idt &method_id,
const java_bytecode_parse_treet::methodt &method)
{
add(class_method_and_bytecodet{class_id, method_id, method});
}

mapt::const_iterator begin() const
{
return map.begin();
}
mapt::const_iterator end() const
{
return map.end();
}

opt_reft get(const irep_idt &method_id)
{
const auto it = map.find(method_id);
if(it == map.end())
return opt_reft();
return std::cref(it->second);
}
};

typedef std::function<void(
const symbolt &,
const java_bytecode_parse_treet::methodt &,
ci_lazy_methods_neededt)> method_convertert;
typedef std::function<
bool(const irep_idt &function_id, ci_lazy_methods_neededt)>
method_convertert;

class ci_lazy_methodst:public messaget
{
Expand All @@ -55,8 +104,8 @@ class ci_lazy_methodst:public messaget
// not const since messaget
bool operator()(
symbol_tablet &symbol_table,
lazy_methodst &lazy_methods,
method_convertert method_converter);
method_bytecodet &method_bytecode,
const method_convertert &method_converter);

private:
void resolve_method_names(
Expand All @@ -66,17 +115,17 @@ class ci_lazy_methodst:public messaget
void initialize_needed_classes(
const std::vector<irep_idt> &entry_points,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods);
ci_lazy_methods_neededt &needed_lazy_methods);

void initialize_all_needed_classes_from_pointer(
const pointer_typet &pointer_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods);
ci_lazy_methods_neededt &needed_lazy_methods);

void initialize_needed_classes_from_pointer(
const pointer_typet &pointer_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods);
ci_lazy_methods_neededt &needed_lazy_methods);

void gather_virtual_callsites(
const exprt &e,
Expand All @@ -93,9 +142,10 @@ class ci_lazy_methodst:public messaget
const symbol_tablet &symbol_table,
symbol_tablet &needed);

void gather_field_types(const typet &class_type,
void gather_field_types(
const typet &class_type,
const namespacet &ns,
ci_lazy_methods_neededt &lazy_methods);
ci_lazy_methods_neededt &needed_lazy_methods);

irep_idt get_virtual_method_target(
const std::set<irep_idt> &needed_classes,
Expand Down
Loading

0 comments on commit a3e19f7

Please sign in to comment.