Skip to content

Commit

Permalink
Java frontend: create synthetic static initialisers for stub globals
Browse files Browse the repository at this point in the history
Previously the Java frontend discovered static field references during method conversion, and when it
found them it created global symbols with nondet values, causing an object tree to be created in
__CPROVER_initialize. This caused two problems:
  (1) they were created late, meaning when incrementally loading functions, __CPROVER_initialize may
      already have been created when a new stub static field is discovered, and
  (2) by creating potentially large trees of potential objects in __CPROVER_initialize, symex would be
      compelled to accrue a lot of possibly-unused state.

This change moves the object tree creation into a synthetic static initialiser, which both defers executing
the initialisation until their class is actually used, and also creates the static initialisers before
method conversion, such that its already_run variable is mentioned in __CPROVER_initialize when it is
created (the initialize function is now both smaller and "right first time").
  • Loading branch information
smowton committed Feb 7, 2018
1 parent 150f826 commit e163ab6
Show file tree
Hide file tree
Showing 11 changed files with 424 additions and 116 deletions.
2 changes: 1 addition & 1 deletion regression/cbmc-java/generic_class_bound1/test.desc
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ Gn.class
.*file Gn.java line 9 function java::Gn.foo1:\(LGn;\)V bytecode-index 1 block 1: FAILED
.*file Gn.java line 10 function java::Gn.foo1:\(LGn;\)V bytecode-index 4 block 2: FAILED
.*file Gn.java line 11 function java::Gn.foo1:\(LGn;\)V bytecode-index 5 block 3: FAILED
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 1: SATISFIED
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 2: SATISFIED
--
4 changes: 4 additions & 0 deletions src/java_bytecode/java_bytecode_convert_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,10 @@ void java_bytecode_convert_classt::convert(
new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
new_symbol.base_name=f.name;
new_symbol.type=field_type;
// Annotating the type with ID_C_class to provide a static field -> class
// link matches the method used by java_bytecode_convert_method::convert
// for methods.
new_symbol.type.set(ID_C_class, class_symbol.name);
new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
"."+id2string(f.name);
new_symbol.mode=ID_java;
Expand Down
38 changes: 6 additions & 32 deletions src/java_bytecode/java_bytecode_convert_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,34 +906,6 @@ static void gather_symbol_live_ranges(
}
}

/// See above
/// \par parameters: `se`: Symbol expression referring to a static field
/// `basename`: The static field's basename
/// \return Creates a symbol table entry for the static field if one doesn't
/// exist already.
void java_bytecode_convert_methodt::check_static_field_stub(
const symbol_exprt &symbol_expr,
const irep_idt &basename)
{
const auto &id=symbol_expr.get_identifier();
if(symbol_table.symbols.find(id)==symbol_table.symbols.end())
{
// Create a stub, to be overwritten if/when the real class is loaded.
symbolt new_symbol;
new_symbol.is_static_lifetime=true;
new_symbol.is_lvalue=true;
new_symbol.is_state_var=true;
new_symbol.name=id;
new_symbol.base_name=basename;
new_symbol.type=symbol_expr.type();
new_symbol.pretty_name=new_symbol.name;
new_symbol.mode=ID_java;
new_symbol.is_type=false;
new_symbol.value.make_nil();
symbol_table.add(new_symbol);
}
}

/// Each static access to classname should be prefixed with a check for
/// necessary static init; this returns a call implementing that check.
/// \param classname: Class name
Expand Down Expand Up @@ -2026,8 +1998,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
field_name.find("$assertionsDisabled")!=std::string::npos;
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);

// If external, create a symbol table entry for this static field:
check_static_field_stub(symbol_expr, field_name);
INVARIANT(
symbol_table.has_symbol(symbol_expr.get_identifier()),
"getstatic symbol should have been created before method conversion");

if(needed_lazy_methods)
{
Expand Down Expand Up @@ -2081,8 +2054,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
const auto &field_name=arg0.get_string(ID_component_name);
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);

// If external, create a symbol table entry for this static field:
check_static_field_stub(symbol_expr, field_name);
INVARIANT(
symbol_table.has_symbol(symbol_expr.get_identifier()),
"putstatic symbol should have been created before method conversion");

if(needed_lazy_methods && arg0.type().id() == ID_symbol)
{
Expand Down
4 changes: 0 additions & 4 deletions src/java_bytecode/java_bytecode_convert_method_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,6 @@ class java_bytecode_convert_methodt:public messaget

const bytecode_infot &get_bytecode_info(const irep_idt &statement);

void check_static_field_stub(
const symbol_exprt &se,
const irep_idt &basename);

bool class_needs_clinit(const irep_idt &classname);
exprt get_or_create_clinit_wrapper(const irep_idt &classname);
codet get_clinit_call(const irep_idt &classname);
Expand Down
156 changes: 134 additions & 22 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ static void infer_opaque_type_fields(
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const auto &instruction : method.instructions)
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getfield" ||
instruction.statement == "putfield")
Expand Down Expand Up @@ -359,7 +360,8 @@ static void generate_constant_global_variables(
{
for(auto &method : parse_tree.parsed_class.methods)
{
for(auto &instruction : method.instructions)
for(java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
// ldc* instructions are Java bytecode "load constant" ops, which can
// retrieve a numeric constant, String literal, or Class literal.
Expand All @@ -381,6 +383,87 @@ static void generate_constant_global_variables(
}
}

/// Add a stub global symbol to the symbol table, initialising pointer-typed
/// symbols with null and primitive-typed ones with an arbitrary (nondet) value.
/// \param symbol_table: table to add to
/// \param symbol_id: new symbol fully-qualified identifier
/// \param symbol_basename: new symbol basename
/// \param symbol_type: new symbol type
/// \param class_id: class id that directly encloses this static field
static void create_stub_global_symbol(
symbol_table_baset &symbol_table,
const irep_idt &symbol_id,
const irep_idt &symbol_basename,
const typet &symbol_type,
const irep_idt &class_id)
{
symbolt new_symbol;
new_symbol.is_static_lifetime = true;
new_symbol.is_lvalue = true;
new_symbol.is_state_var = true;
new_symbol.name = symbol_id;
new_symbol.base_name = symbol_basename;
new_symbol.type = symbol_type;
new_symbol.type.set(ID_C_class, class_id);
new_symbol.pretty_name = new_symbol.name;
new_symbol.mode = ID_java;
new_symbol.is_type = false;
// If pointer-typed, initialise to null and a static initialiser will be
// created to initialise on first reference. If primitive-typed, specify
// nondeterministic initialisation by setting a nil value.
if(symbol_type.id() == ID_pointer)
new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
else
new_symbol.value.make_nil();
bool add_failed = symbol_table.add(new_symbol);
INVARIANT(
!add_failed, "caller should have checked symbol not already in table");
}

/// Search for getstatic and putstatic instructions in a class' bytecode and
/// create stub symbols for any static fields that aren't already in the symbol
/// table. The new symbols are null-initialised for reference-typed globals /
/// static fields, and nondet-initialised for primitives.
/// \param parse_tree: class bytecode
/// \param symbol_table: symbol table; may gain new symbols
static void create_stub_global_symbols(
const java_bytecode_parse_treet &parse_tree,
symbol_table_baset &symbol_table)
{
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getstatic" ||
instruction.statement == "putstatic")
{
INVARIANT(
instruction.args.size() > 0,
"get/putstatic should have at least one argument");
irep_idt component = instruction.args[0].get_string(ID_component_name);
INVARIANT(
!component.empty(), "get/putstatic should specify a component");
irep_idt class_id = instruction.args[0].get_string(ID_class);
INVARIANT(
!class_id.empty(), "get/putstatic should specify a class");
irep_idt identifier = id2string(class_id) + "." + id2string(component);

if(!symbol_table.has_symbol(identifier))
{
create_stub_global_symbol(
symbol_table,
identifier,
component,
instruction.args[0].type(),
class_id);
}
}
}
}
}

bool java_bytecode_languaget::typecheck(
symbol_tablet &symbol_table,
const std::string &module)
Expand Down Expand Up @@ -477,10 +560,31 @@ bool java_bytecode_languaget::typecheck(
<< " String or Class constant symbols"
<< messaget::eom;

// For each reference to a stub global (that is, a global variable declared on
// a class we don't have bytecode for, and therefore don't know the static
// initialiser for), create a synthetic static initialiser (clinit method)
// to nondet initialise it.
// Note this must be done before making static initialiser wrappers below, as
// this makes a Classname.clinit method, then the next pass makes a wrapper
// that ensures it is only run once, and that static initialisation happens
// in class-graph topological order.

{
journalling_symbol_tablet symbol_table_journal =
journalling_symbol_tablet::wrap(symbol_table);
for(const auto &c : java_class_loader.class_map)
{
create_stub_global_symbols(c.second, symbol_table_journal);
}

stub_global_initializer_factory.create_stub_global_initializer_symbols(
symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
}

// For each class that will require a static initializer wrapper, create a
// function named package.classname::clinit_wrapper, and a corresponding
// global tracking whether it has run or not:
create_static_initializer_wrappers(symbol_table);
create_static_initializer_wrappers(symbol_table, synthetic_methods);

// Now incrementally elaborate methods
// that are reachable from this entry point.
Expand All @@ -495,24 +599,12 @@ bool java_bytecode_languaget::typecheck(
journalling_symbol_tablet journalling_symbol_table =
journalling_symbol_tablet::wrap(symbol_table);

// Convert all synthetic methods:
for(const auto &function_id_and_type : synthetic_methods)
{
// Convert all static initialisers:
std::vector<irep_idt> static_initialisers;

// Careful not to add symbols while iterating over the symbol table!
for(const auto &symbol : symbol_table.symbols)
{
if(is_clinit_wrapper_function(symbol.second.name))
static_initialisers.push_back(symbol.second.name);
}

for(const auto &static_init_wrapper_name : static_initialisers)
{
convert_single_method(
static_init_wrapper_name, journalling_symbol_table);
}
convert_single_method(
function_id_and_type.first, journalling_symbol_table);
}

// Convert all methods for which we have bytecode now
for(const auto &method_sig : method_bytecode)
{
Expand Down Expand Up @@ -714,6 +806,8 @@ bool java_bytecode_languaget::convert_single_method(
// Get bytecode for specified function if we have it
method_bytecodet::opt_reft cmb = method_bytecode.get(function_id);

synthetic_methods_mapt::iterator synthetic_method_it;

// Check if have a string solver implementation
if(string_preprocess.implements_function(function_id))
{
Expand All @@ -735,11 +829,29 @@ bool java_bytecode_languaget::convert_single_method(
symbol.value = generated_code;
return false;
}
else if(is_clinit_wrapper_function(function_id))
else if(
(synthetic_method_it = synthetic_methods.find(function_id)) !=
synthetic_methods.end())
{
// Synthetic method (i.e. one generated by the Java frontend and which
// doesn't occur in the source bytecode):
symbolt &symbol = symbol_table.get_writeable_ref(function_id);
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
// Notify lazy methods of other static init functions called:
switch(synthetic_method_it->second)
{
case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER:
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
break;
case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER:
symbol.value =
stub_global_initializer_factory.get_stub_initializer_body(
function_id,
symbol_table,
object_factory_parameters,
get_pointer_type_selector());
break;
}
// Notify lazy methods of static calls made from the newly generated
// function:
notify_static_method_calls(symbol.value, needed_lazy_methods);
return false;
}
Expand Down
29 changes: 5 additions & 24 deletions src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Author: Daniel Kroening, [email protected]
#include "ci_lazy_methods.h"
#include "ci_lazy_methods_needed.h"
#include "java_class_loader.h"
#include "java_static_initializers.h"
#include "java_string_library_preprocess.h"
#include "object_factory_parameters.h"
#include "synthetic_methods_map.h"

#include <java_bytecode/select_pointer_type.h>

Expand Down Expand Up @@ -53,10 +56,6 @@ Author: Daniel Kroening, [email protected]
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
" A '.*' wildcard is allowed to specify all class members\n"

#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5
#define MAX_NONDET_STRING_LENGTH std::numeric_limits<std::int32_t>::max()
#define MAX_NONDET_TREE_DEPTH 5

class symbolt;

enum lazy_methods_modet
Expand All @@ -66,26 +65,6 @@ enum lazy_methods_modet
LAZY_METHODS_MODE_CONTEXT_SENSITIVE
};

struct object_factory_parameterst final
{
/// Maximum value for the non-deterministically-chosen length of an array.
size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT;

/// Maximum value for the non-deterministically-chosen length of a string.
size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH;

/// Maximum depth for object hierarchy on input.
/// Used to prevent object factory to loop infinitely during the
/// generation of code that allocates/initializes data structures of recursive
/// data types or unbounded depth. We bound the maximum number of times we
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
/// such depth becomes >= than this maximum value.
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;

/// Force string content to be ASCII printable characters when set to true.
bool string_printable = false;
};

class java_bytecode_languaget:public languaget
{
public:
Expand Down Expand Up @@ -192,6 +171,8 @@ class java_bytecode_languaget:public languaget

private:
const std::unique_ptr<const select_pointer_typet> pointer_type_selector;
synthetic_methods_mapt synthetic_methods;
stub_global_initializer_factoryt stub_global_initializer_factory;
};

std::unique_ptr<languaget> new_java_bytecode_language();
Expand Down
13 changes: 0 additions & 13 deletions src/java_bytecode/java_entry_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,6 @@ static bool should_init_symbol(const symbolt &sym)
return is_java_string_literal_id(sym.name);
}

static bool is_non_null_library_global(const irep_idt &symbolid)
{
static const std::unordered_set<irep_idt, irep_id_hash> non_null_globals=
{
"java::java.lang.System.out",
"java::java.lang.System.err",
"java::java.lang.System.in"
};
return non_null_globals.count(symbolid);
}

static void java_static_lifetime_init(
symbol_table_baset &symbol_table,
const source_locationt &source_location,
Expand Down Expand Up @@ -129,8 +118,6 @@ static void java_static_lifetime_init(
allow_null=false;
if(allow_null && is_java_string_literal_id(nameid))
allow_null=false;
if(allow_null && is_non_null_library_global(nameid))
allow_null=false;
}
gen_nondet_init(
sym.symbol_expr(),
Expand Down
Loading

0 comments on commit e163ab6

Please sign in to comment.