diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 14e51502868..4c108ba51b0 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -61,14 +60,14 @@ ci_lazy_methodst::ci_lazy_methodst( /// from the main entry point (usually provided with the --function command- /// line option /// \param symbol_table: global symbol table -/// \param [out] lazy_methods: map from method names to relevant symbol and +/// \param [out] method_bytecode: map from method names to relevant symbol and /// parsed-method objects. /// \param method_converter: Function for converting methods on demand. /// \return Returns false on success bool ci_lazy_methodst::operator()( symbol_tablet &symbol_table, - lazy_methodst &lazy_methods, - method_convertert method_converter) + method_bytecodet &method_bytecode, + const method_convertert &method_converter) { std::vector method_worklist1; std::vector method_worklist2; @@ -141,21 +140,17 @@ bool ci_lazy_methodst::operator()( { if(!methods_already_populated.insert(mname).second) continue; - auto findit=lazy_methods.find(mname); - if(findit==lazy_methods.end()) + debug() << "CI lazy methods: elaborate " << mname << eom; + if( + method_converter( + mname, + // Note this wraps *references* to method_worklist2 & needed_classes + ci_lazy_methods_neededt( + method_worklist2, needed_classes, symbol_table))) { - debug() << "Skip " << mname << eom; + // Couldn't convert this function continue; } - debug() << "CI lazy methods: elaborate " << mname << eom; - const auto &parsed_method=findit->second; - // Note this wraps *references* to method_worklist2, needed_classes: - ci_lazy_methods_neededt new_lazy_methods( - method_worklist2, - needed_classes, - symbol_table); - method_converter( - *parsed_method.first, *parsed_method.second, new_lazy_methods); gather_virtual_callsites( symbol_table.lookup_ref(mname).value, virtual_callsites); @@ -189,15 +184,23 @@ bool ci_lazy_methodst::operator()( for(const auto &sym : symbol_table.symbols) { + // Don't keep global variables (unless they're gathered below from a + // function that references them) if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && - !methods_already_populated.count(sym.first)) - { - continue; - } if(sym.second.type.id()==ID_code) + { + // Don't keep functions that belong to this language that we haven't + // converted above + if( + method_bytecode.contains_method(sym.first) && + !methods_already_populated.count(sym.first)) + { + continue; + } + // If this is a function then add all the things used in it gather_needed_globals(sym.second.value, symbol_table, keep_symbols); + } keep_symbols.add(sym.second); } @@ -263,13 +266,13 @@ void ci_lazy_methodst::resolve_method_names( /// \param entry_points: list of fully-qualified function names that /// we should assume are reachable /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points`. +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points`. void ci_lazy_methodst::initialize_needed_classes( const std::vector &entry_points, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { for(const auto &mname : entry_points) { @@ -281,7 +284,7 @@ void ci_lazy_methodst::initialize_needed_classes( { const pointer_typet &original_pointer=to_pointer_type(param.type()); initialize_all_needed_classes_from_pointer( - original_pointer, ns, lazy_methods); + original_pointer, ns, needed_lazy_methods); } } } @@ -289,13 +292,13 @@ void ci_lazy_methodst::initialize_needed_classes( // Also add classes whose instances are magically // created by the JVM and so won't be spotted by // looking for constructors and calls as usual: - lazy_methods.add_needed_class("java::java.lang.String"); - lazy_methods.add_needed_class("java::java.lang.Class"); - lazy_methods.add_needed_class("java::java.lang.Object"); + needed_lazy_methods.add_needed_class("java::java.lang.String"); + needed_lazy_methods.add_needed_class("java::java.lang.Class"); + needed_lazy_methods.add_needed_class("java::java.lang.Object"); // As in class_loader, ensure these classes stay available for(const auto &id : extra_needed_classes) - lazy_methods.add_needed_class("java::" + id2string(id)); + needed_lazy_methods.add_needed_class("java::" + id2string(id)); } /// Build up list of methods for types for a pointer and any types it @@ -303,16 +306,15 @@ void ci_lazy_methodst::initialize_needed_classes( /// `initialize_needed_classes` for more details. /// \param pointer_type: The type to gather methods for. /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points` void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { - initialize_needed_classes_from_pointer( - pointer_type, ns, lazy_methods); + initialize_needed_classes_from_pointer(pointer_type, ns, needed_lazy_methods); const pointer_typet &subbed_pointer_type= pointer_type_selector.convert_pointer_type(pointer_type, ns); @@ -320,7 +322,7 @@ void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( if(subbed_pointer_type!=pointer_type) { initialize_needed_classes_from_pointer( - subbed_pointer_type, ns, lazy_methods); + subbed_pointer_type, ns, needed_lazy_methods); } } @@ -328,20 +330,20 @@ void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( /// `initialize_needed_classes` for more details. /// \param pointer_type: The type to gather methods for. /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points` void ci_lazy_methodst::initialize_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { const symbol_typet &class_type=to_symbol_type(pointer_type.subtype()); const auto ¶m_classid=class_type.get_identifier(); - if(lazy_methods.add_needed_class(param_classid)) + if(needed_lazy_methods.add_needed_class(param_classid)) { - gather_field_types(pointer_type.subtype(), ns, lazy_methods); + gather_field_types(pointer_type.subtype(), ns, needed_lazy_methods); } } @@ -462,30 +464,30 @@ void ci_lazy_methodst::gather_needed_globals( gather_needed_globals(*opit, symbol_table, needed); } -/// See param lazy_methods +/// See param needed_lazy_methods /// \param class_type: root of class tree to search /// \param ns: global namespace -/// \param [out] lazy_methods: Popualted with all Java reference types reachable -/// starting at `class_type`. For example if `class_type` is +/// \param [out] needed_lazy_methods: Popualted with all Java reference types +/// reachable starting at `class_type`. For example if `class_type` is /// `symbol_typet("java::A")` and A has a B field, then `B` (but not `A`) will /// noted as a needed class. void ci_lazy_methodst::gather_field_types( const typet &class_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { const auto &underlying_type=to_struct_type(ns.follow(class_type)); for(const auto &field : underlying_type.components()) { if(field.type().id()==ID_struct || field.type().id()==ID_symbol) - gather_field_types(field.type(), ns, lazy_methods); + gather_field_types(field.type(), ns, needed_lazy_methods); else if(field.type().id()==ID_pointer) { // Skip array primitive pointers, for example: if(field.type().subtype().id()!=ID_symbol) continue; initialize_all_needed_classes_from_pointer( - to_pointer_type(field.type()), ns, lazy_methods); + to_pointer_type(field.type()), ns, needed_lazy_methods); } } } diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 5c5d2aca51c..9b9b7dda3c4 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -26,18 +26,67 @@ class java_string_library_preprocesst; -typedef std::pair< - const symbolt *, - const java_bytecode_parse_treet::methodt *> - lazy_method_valuet; +// Map from method id to class_method_and_bytecodet +class method_bytecodet +{ +public: + /// Pair of class id and methodt + struct class_method_and_bytecodet + { + irep_idt class_id; + irep_idt method_id; + const java_bytecode_parse_treet::methodt &method; + }; -typedef std::map - lazy_methodst; + typedef optionalt> + opt_reft; + +private: + typedef std::map mapt; + mapt map; + +public: + bool contains_method(const irep_idt &method_id) const + { + return map.count(method_id) != 0; + } + + void add(const class_method_and_bytecodet &method_class_and_bytecode) + { + map.emplace( + std::make_pair( + method_class_and_bytecode.method_id, method_class_and_bytecode)); + } + + void add( + const irep_idt &class_id, + const irep_idt &method_id, + const java_bytecode_parse_treet::methodt &method) + { + add(class_method_and_bytecodet{class_id, method_id, method}); + } + + mapt::const_iterator begin() const + { + return map.begin(); + } + mapt::const_iterator end() const + { + return map.end(); + } + + opt_reft get(const irep_idt &method_id) + { + const auto it = map.find(method_id); + if(it == map.end()) + return opt_reft(); + return std::cref(it->second); + } +}; -typedef std::function method_convertert; +typedef std::function< + bool(const irep_idt &function_id, ci_lazy_methods_neededt)> + method_convertert; class ci_lazy_methodst:public messaget { @@ -55,8 +104,8 @@ class ci_lazy_methodst:public messaget // not const since messaget bool operator()( symbol_tablet &symbol_table, - lazy_methodst &lazy_methods, - method_convertert method_converter); + method_bytecodet &method_bytecode, + const method_convertert &method_converter); private: void resolve_method_names( @@ -66,17 +115,17 @@ class ci_lazy_methodst:public messaget void initialize_needed_classes( const std::vector &entry_points, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void initialize_all_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void initialize_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void gather_virtual_callsites( const exprt &e, @@ -93,9 +142,10 @@ class ci_lazy_methodst:public messaget const symbol_tablet &symbol_table, symbol_tablet &needed); - void gather_field_types(const typet &class_type, + void gather_field_types( + const typet &class_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); irep_idt get_virtual_method_target( const std::set &needed_classes, diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 4c2e640ff9e..e6302f82dcd 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -35,15 +35,15 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, - lazy_methodst& _lazy_methods, + method_bytecodet &method_bytecode, lazy_methods_modet _lazy_methods_mode, - java_string_library_preprocesst &_string_preprocess): - messaget(_message_handler), - symbol_table(_symbol_table), - max_array_length(_max_array_length), - lazy_methods(_lazy_methods), - lazy_methods_mode(_lazy_methods_mode), - string_preprocess(_string_preprocess) + java_string_library_preprocesst &_string_preprocess) + : messaget(_message_handler), + symbol_table(_symbol_table), + max_array_length(_max_array_length), + method_bytecode(method_bytecode), + lazy_methods_mode(_lazy_methods_mode), + string_preprocess(_string_preprocess) { } @@ -73,7 +73,7 @@ class java_bytecode_convert_classt:public messaget protected: symbol_tablet &symbol_table; const size_t max_array_length; - lazy_methodst &lazy_methods; + method_bytecodet &method_bytecode; lazy_methods_modet lazy_methods_mode; java_string_library_preprocesst &string_preprocess; @@ -205,7 +205,7 @@ void java_bytecode_convert_classt::convert(const classt &c) method, symbol_table, get_message_handler()); - lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); + method_bytecode.add(qualified_classname, method_identifier, method); } // is this a root class? @@ -483,7 +483,7 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - lazy_methodst &lazy_methods, + method_bytecodet &method_bytecode, lazy_methods_modet lazy_methods_mode, java_string_library_preprocesst &string_preprocess) { @@ -491,7 +491,7 @@ bool java_bytecode_convert_class( symbol_table, message_handler, max_array_length, - lazy_methods, + method_bytecode, lazy_methods_mode, string_preprocess); diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 6f87db44baa..a4efa307641 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -24,7 +24,7 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - lazy_methodst &, + method_bytecodet &, lazy_methods_modet, java_string_library_preprocesst &string_preprocess); diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 07627b0d062..ac6bceab861 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -79,10 +79,10 @@ class patternt /// \return Assigns parameter names (side-effects on `ftype`) to function stub /// parameters, which are initially nameless as method conversion hasn't /// happened. Also creates symbols in `symbol_table`. -void assign_parameter_names( +static void assign_parameter_names( code_typet &ftype, const irep_idt &name_prefix, - symbol_tablet &symbol_table) + symbol_table_baset &symbol_table) { code_typet::parameterst ¶meters=ftype.parameters(); @@ -1429,8 +1429,8 @@ codet java_bytecode_convert_methodt::convert_instructions( if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) { - if(lazy_methods) - lazy_methods->add_needed_class(classname); + if(needed_lazy_methods) + needed_lazy_methods->add_needed_class(classname); code_type.set(ID_constructor, true); } else @@ -1548,11 +1548,11 @@ codet java_bytecode_convert_methodt::convert_instructions( { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); - if(lazy_methods) + if(needed_lazy_methods) { - lazy_methods->add_needed_method(arg0.get(ID_identifier)); + needed_lazy_methods->add_needed_method(arg0.get(ID_identifier)); // Calling a static method causes static initialization: - lazy_methods->add_needed_class(arg0.get(ID_C_class)); + needed_lazy_methods->add_needed_class(arg0.get(ID_C_class)); } } @@ -2180,11 +2180,11 @@ codet java_bytecode_convert_methodt::convert_instructions( // If external, create a symbol table entry for this static field: check_static_field_stub(symbol_expr, field_name); - if(lazy_methods) + if(needed_lazy_methods) { if(arg0.type().id()==ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(arg0.type()).get_identifier()); } else if(arg0.type().id()==ID_pointer) @@ -2192,13 +2192,13 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &pointer_type=to_pointer_type(arg0.type()); if(pointer_type.subtype().id()==ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(pointer_type.subtype()).get_identifier()); } } else if(is_assertions_disabled_field) { - lazy_methods->add_needed_class(arg0.get_string(ID_class)); + needed_lazy_methods->add_needed_class(arg0.get_string(ID_class)); } } results[0]=java_bytecode_promotion(symbol_expr); @@ -2235,9 +2235,9 @@ codet java_bytecode_convert_methodt::convert_instructions( // If external, create a symbol table entry for this static field: check_static_field_stub(symbol_expr, field_name); - if(lazy_methods && arg0.type().id()==ID_symbol) + if(needed_lazy_methods && arg0.type().id() == ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(arg0.type()).get_identifier()); } @@ -2844,13 +2844,103 @@ codet java_bytecode_convert_methodt::convert_instructions( return code; } +/// This uses a cut-down version of the logic in +/// java_bytecode_convert_methodt::convert to initialize symbols for the +/// parameters and update the parameters in the type of method_symbol with +/// names read from the local_variable_table read from the bytecode +/// \remarks This is useful for pre-initialization for methods generated by +/// the string solver +/// \param method_symbol: The symbol for the method for which to initialize the +/// parameters +/// \param local_variable_table: The local variable table containing the +/// bytecode for the parameters +/// \param symbol_table: The symbol table into which to insert symbols for the +/// parameters +void java_bytecode_initialize_parameter_names( + symbolt &method_symbol, + const java_bytecode_parse_treet::methodt::local_variable_tablet + &local_variable_table, + symbol_table_baset &symbol_table) +{ + // Obtain a std::vector of code_typet::parametert objects from the + // (function) type of the symbol + code_typet &code_type = to_code_type(method_symbol.type); + code_typet::parameterst ¶meters = code_type.parameters(); + + // Find number of parameters + unsigned slots_for_parameters = java_method_parameter_slots(code_type); + + // Find parameter names in the local variable table: + typedef std::pair base_name_and_identifiert; + std::map param_names; + for(const auto &v : local_variable_table) + { + if(v.index < slots_for_parameters) + param_names.emplace( + v.index, + make_pair( + v.name, id2string(method_symbol.name) + "::" + id2string(v.name))); + } + + // Assign names to parameters + std::size_t param_index = 0; + for(auto ¶m : parameters) + { + irep_idt base_name, identifier; + + // Construct a sensible base name (base_name) and a fully qualified name + // (identifier) for every parameter of the method under translation, + // regardless of whether we have an LVT or not; and assign it to the + // parameter object (which is stored in the type of the symbol, not in the + // symbol table) + if(param_index == 0 && param.get_this()) + { + // my.package.ClassName.myMethodName:(II)I::this + base_name = "this"; + identifier = id2string(method_symbol.name) + "::" + id2string(base_name); + } + else + { + auto param_name = param_names.find(param_index); + if(param_name != param_names.end()) + { + base_name = param_name->second.first; + identifier = param_name->second.second; + } + else + { + // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local + // variable slot where the parameter is stored and T is a character + // indicating the type + const typet &type = param.type(); + char suffix = java_char_from_type(type); + base_name = "arg" + std::to_string(param_index) + suffix; + identifier = + id2string(method_symbol.name) + "::" + id2string(base_name); + } + } + param.set_base_name(base_name); + param.set_identifier(identifier); + + // Build a new symbol for the parameter and add it to the symbol table + parameter_symbolt parameter_symbol; + parameter_symbol.base_name = base_name; + parameter_symbol.mode = ID_java; + parameter_symbol.name = identifier; + parameter_symbol.type = param.type(); + symbol_table.insert(parameter_symbol); + + param_index += java_local_variable_slots(param.type()); + } +} + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, - symbol_tablet &symbol_table, + symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer lazy_methods, + optionalt needed_lazy_methods, java_string_library_preprocesst &string_preprocess) { static const std::unordered_set methods_to_ignore @@ -2881,7 +2971,7 @@ void java_bytecode_convert_method( symbol_table, message_handler, max_array_length, - lazy_methods, + needed_lazy_methods, string_preprocess); java_bytecode_convert_method(class_symbol, method); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 6fd97a4b2d3..b27fd1cf898 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -14,7 +14,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include #include "java_string_library_preprocess.h" #include "java_bytecode_parse_tree.h" @@ -22,33 +21,21 @@ Author: Daniel Kroening, kroening@kroening.com class class_hierarchyt; +void java_bytecode_initialize_parameter_names( + symbolt &method_symbol, + const java_bytecode_parse_treet::methodt::local_variable_tablet + &local_variable_table, + symbol_table_baset &symbol_table); + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, - symbol_tablet &symbol_table, + symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer lazy_methods, + optionalt needed_lazy_methods, java_string_library_preprocesst &string_preprocess); -inline void java_bytecode_convert_method( - const symbolt &class_symbol, - const java_bytecode_parse_treet::methodt &method, - symbol_tablet &symbol_table, - message_handlert &message_handler, - size_t max_array_length, - java_string_library_preprocesst &string_preprocess) -{ - java_bytecode_convert_method( - class_symbol, - method, - symbol_table, - message_handler, - max_array_length, - safe_pointer::create_null(), - string_preprocess); -} - void java_bytecode_convert_method_lazy( const symbolt &class_symbol, const irep_idt &method_identifier, diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index dfd71f322d6..0d1c34d82db 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -16,7 +16,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include -#include #include #include "java_bytecode_parse_tree.h" #include "java_bytecode_convert_class.h" @@ -32,18 +31,18 @@ class java_bytecode_convert_methodt:public messaget { public: java_bytecode_convert_methodt( - symbol_tablet &_symbol_table, + symbol_table_baset &symbol_table, message_handlert &_message_handler, size_t _max_array_length, - safe_pointer _lazy_methods, - java_string_library_preprocesst &_string_preprocess): - messaget(_message_handler), - symbol_table(_symbol_table), - max_array_length(_max_array_length), - lazy_methods(_lazy_methods), - string_preprocess(_string_preprocess), - slots_for_parameters(0), - method_has_this(false) + optionalt needed_lazy_methods, + java_string_library_preprocesst &_string_preprocess) + : messaget(_message_handler), + symbol_table(symbol_table), + max_array_length(_max_array_length), + needed_lazy_methods(std::move(needed_lazy_methods)), + string_preprocess(_string_preprocess), + slots_for_parameters(0), + method_has_this(false) { } @@ -59,9 +58,9 @@ class java_bytecode_convert_methodt:public messaget } protected: - symbol_tablet &symbol_table; + symbol_table_baset &symbol_table; const size_t max_array_length; - safe_pointer lazy_methods; + optionalt needed_lazy_methods; /// Fully qualified name of the method under translation. /// Initialized by `convert`. diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index c949b54f211..21d9d4ce123 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,6 +14,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include +#include #include #include #include @@ -205,14 +207,15 @@ bool java_bytecode_languaget::typecheck( if(c_it->second.parsed_class.name.empty()) continue; - if(java_bytecode_convert_class( - c_it->second, - symbol_table, - get_message_handler(), - max_user_array_length, - lazy_methods, - lazy_methods_mode, - string_preprocess)) + if( + java_bytecode_convert_class( + c_it->second, + symbol_table, + get_message_handler(), + max_user_array_length, + method_bytecode, + lazy_methods_mode, + string_preprocess)) return true; } @@ -241,21 +244,23 @@ bool java_bytecode_languaget::typecheck( if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { // ci: context-insensitive. - if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) + if(do_ci_lazy_method_conversion(symbol_table, method_bytecode)) return true; } else if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) { - // Simply elaborate all methods symbols now. - for(const auto &method_sig : lazy_methods) + journalling_symbol_tablet journalling_symbol_table = + journalling_symbol_tablet::wrap(symbol_table); + // Convert all methods for which we have bytecode now + for(const auto &method_sig : method_bytecode) { - java_bytecode_convert_method( - *method_sig.second.first, - *method_sig.second.second, - symbol_table, - get_message_handler(), - max_user_array_length, - string_preprocess); + convert_single_method(method_sig.first, journalling_symbol_table); + } + // Now convert all newly added string methods + for(const auto &fn_name : journalling_symbol_table.get_inserted()) + { + if(string_preprocess.implements_function(fn_name)) + convert_single_method(fn_name, symbol_table); } } // Otherwise our caller is in charge of elaborating methods on demand. @@ -300,29 +305,22 @@ bool java_bytecode_languaget::generate_support_functions( /// instantiated (or evidence that an object of that type exists before the main /// function is entered, such as being passed as a parameter). /// \par parameters: `symbol_table`: global symbol table -/// `lazy_methods`: map from method names to relevant symbol and parsed-method -/// objects. +/// `method_bytecode`: map from method names to relevant symbol and +/// parsed-method objects. /// \return Elaborates lazily-converted methods that may be reachable starting /// from the main entry point (usually provided with the --function command- /// line option) (side-effect on the symbol_table). Returns false on success. bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, - lazy_methodst &lazy_methods) + method_bytecodet &method_bytecode) { - const auto method_converter=[&]( - const symbolt &symbol, - const java_bytecode_parse_treet::methodt &method, - ci_lazy_methods_neededt new_lazy_methods) - { - java_bytecode_convert_method( - symbol, - method, - symbol_table, - get_message_handler(), - max_user_array_length, - safe_pointer::create_non_null(&new_lazy_methods), - string_preprocess); - }; + const method_convertert method_converter = + [this, &symbol_table] + (const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed) + { + return convert_single_method( + function_id, symbol_table, std::move(lazy_methods_needed)); + }; ci_lazy_methodst method_gather( symbol_table, @@ -334,7 +332,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_pointer_type_selector(), get_message_handler()); - return method_gather(symbol_table, lazy_methods, method_converter); + return method_gather(symbol_table, method_bytecode, method_converter); } const select_pointer_typet & @@ -349,78 +347,116 @@ const select_pointer_typet & /// \return Populates `methods` with the complete list of lazy methods that are /// available to convert (those which are valid parameters for /// `convert_lazy_method`) -void java_bytecode_languaget::lazy_methods_provided( - std::set &methods) const +void java_bytecode_languaget::methods_provided(id_sett &methods) const { - for(const auto &kv : lazy_methods) + // Add all string solver methods to map + string_preprocess.get_all_function_names(methods); + // Add all concrete methods to map + for(const auto &kv : method_bytecode) methods.insert(kv.first); } -/// Promote a lazy-converted method (one whose type is known but whose body -/// hasn't been converted) into a fully- elaborated one. -/// \par parameters: `id`: method ID to convert -/// `symtab`: global symbol table -/// \return Amends the symbol table entry for function `id`, which should be a -/// lazy method provided by this instance of `java_bytecode_languaget`. It -/// should initially have a nil value. After this method completes, it will -/// have a value representing the method body, identical to that produced -/// using eager method conversion. +/// \brief Promote a lazy-converted method (one whose type is known but whose +/// body hasn't been converted) into a fully-elaborated one. +/// \remarks Amends the symbol table entry for function `function_id`, which +/// should be a method provided by this instance of `java_bytecode_languaget` +/// to have a value representing the method body identical to that produced +/// using eager method conversion. +/// \param function_id: method ID to convert +/// \param symbol_table: global symbol table void java_bytecode_languaget::convert_lazy_method( - const irep_idt &id, - symbol_tablet &symtab) + const irep_idt &function_id, + symbol_tablet &symbol_table) { - const auto &lazy_method_entry=lazy_methods.at(id); - java_bytecode_convert_method( - *lazy_method_entry.first, - *lazy_method_entry.second, - symtab, - get_message_handler(), - max_user_array_length, - string_preprocess); + convert_single_method(function_id, symbol_table); } -/// Replace methods of the String library that are in the symbol table by code -/// generated by string_preprocess. -/// \param context: a symbol table -void java_bytecode_languaget::replace_string_methods( - symbol_table_baset &context) +/// \brief Convert a method (one whose type is known but whose body hasn't +/// been converted) but don't run typecheck, etc +/// \remarks Amends the symbol table entry for function `function_id`, which +/// should be a method provided by this instance of `java_bytecode_languaget` +/// to have a value representing the method body. +/// \param function_id: method ID to convert +/// \param symbol_table: global symbol table +/// \param needed_lazy_methods: optionally a collection of needed methods to +/// update with any methods touched during the conversion +bool java_bytecode_languaget::convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + optionalt needed_lazy_methods) { - // Symbols that have code type are potentialy to be replaced - std::list code_symbols; - forall_symbols(symbol, context.symbols) - { - if(symbol->second.type.id()==ID_code) - code_symbols.push_back(symbol->second); - } + const symbolt &symbol = symbol_table.lookup_ref(function_id); - for(const auto &symbol : code_symbols) + // Nothing to do if body is already loaded + if(symbol.value.is_not_nil()) + return false; + + // Get bytecode for specified function if we have it + method_bytecodet::opt_reft cmb = method_bytecode.get(function_id); + + // Check if have a string solver implementation + if(string_preprocess.implements_function(function_id)) { - const irep_idt &id=symbol.name; - exprt generated_code=string_preprocess.code_for_function( - id, to_code_type(symbol.type), symbol.location, context); - if(generated_code.is_not_nil()) + symbolt &symbol = symbol_table.get_writeable_ref(function_id); + // Load parameter names from any extant bytecode before filling in body + if(cmb) { - // Replace body of the function by code generated by string preprocess - symbolt &symbol=*context.get_writeable(id); - symbol.value=generated_code; - // Specifically instrument the new code, since this comes after the - // blanket instrumentation pass called before typechecking. - java_bytecode_instrument_symbol( - context, - symbol, - throw_runtime_exceptions, - get_message_handler()); + java_bytecode_initialize_parameter_names( + symbol, cmb->get().method.local_variable_table, symbol_table); } + // Populate body of the function with code generated by string preprocess + exprt generated_code = + string_preprocess.code_for_function(symbol, symbol_table); + INVARIANT( + generated_code.is_not_nil(), "Couldn't retrieve code for string method"); + // String solver can make calls to functions that haven't yet been seen. + // Add these to the needed_lazy_methods collection + if(needed_lazy_methods) + { + for(const_depth_iteratort it = generated_code.depth_cbegin(); + it != generated_code.depth_cend(); + ++it) + { + if(it->id() == ID_code) + { + const auto fn_call = expr_try_dynamic_cast(*it); + if(!fn_call) + continue; + // Only support non-virtual function calls for now, if string solver + // starts to introduce virtual function calls then we will need to + // duplicate the behavior of java_bytecode_convert_method where it + // handles the invokevirtual instruction + const symbol_exprt &fn_sym = + expr_dynamic_cast(fn_call->function()); + needed_lazy_methods->add_needed_method(fn_sym.get_identifier()); + } + } + } + symbol.value = generated_code; + return false; } + + // No string solver implementation, check if have bytecode for it + if(cmb) + { + java_bytecode_convert_method( + symbol_table.lookup_ref(cmb->get().class_id), + cmb->get().method, + symbol_table, + get_message_handler(), + max_user_array_length, + std::move(needed_lazy_methods), + string_preprocess); + return false; + } + + return true; } bool java_bytecode_languaget::final(symbol_table_baset &symbol_table) { PRECONDITION(language_options_initialized); - // replace code of String methods calls by code we generate - replace_string_methods(symbol_table); - return false; } diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index d8999572942..905c56afb6e 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -16,6 +16,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include "ci_lazy_methods.h" +#include "ci_lazy_methods_needed.h" #include "java_class_loader.h" #include "java_string_library_preprocess.h" @@ -81,13 +83,6 @@ struct object_factory_parameterst final bool string_printable = false; }; -typedef std::pair< - const symbolt *, - const java_bytecode_parse_treet::methodt *> - lazy_method_valuet; -typedef std::map - lazy_methodst; - class java_bytecode_languaget:public languaget { public: @@ -109,8 +104,6 @@ class java_bytecode_languaget:public languaget symbol_tablet &context, const std::string &module) override; - void replace_string_methods(symbol_table_baset &context); - virtual bool final(symbol_table_baset &context) override; void show_parse(std::ostream &out) override; @@ -156,12 +149,25 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; - virtual void lazy_methods_provided(std::set &) const override; + virtual void methods_provided(id_sett &methods) const override; virtual void convert_lazy_method( - const irep_idt &id, symbol_tablet &) override; + const irep_idt &function_id, + symbol_tablet &symbol_table) override; protected: - bool do_ci_lazy_method_conversion(symbol_tablet &, lazy_methodst &); + void convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table) + { + convert_single_method( + function_id, symbol_table, optionalt()); + } + bool convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + optionalt needed_lazy_methods); + + bool do_ci_lazy_method_conversion(symbol_tablet &, method_bytecodet &); const select_pointer_typet &get_pointer_type_selector() const; irep_idt main_class; @@ -170,7 +176,7 @@ class java_bytecode_languaget:public languaget bool assume_inputs_non_null; // assume inputs variables to be non-null object_factory_parameterst object_factory_parameters; size_t max_user_array_length; // max size for user code created arrays - lazy_methodst lazy_methods; + method_bytecodet method_bytecode; lazy_methods_modet lazy_methods_mode; std::vector lazy_methods_extra_entry_points; bool string_refinement_enabled; diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index 1da4340e163..6cee9ea59b8 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -1758,6 +1758,39 @@ codet java_string_library_preprocesst::make_string_length_code( return code_returnt(get_length(deref, symbol_table)); } +bool java_string_library_preprocesst::implements_function( + const irep_idt &function_id) const +{ + for(const id_mapt *map : id_maps) + if(map->count(function_id) != 0) + return true; + + return conversion_table.count(function_id) != 0; +} + +template +void add_keys_to_container(const TMap &map, TContainer &container) +{ + static_assert( + std::is_same::value, + "TContainer value_type doesn't match TMap key_type"); + std::transform( + map.begin(), + map.end(), + std::inserter(container, container.begin()), + [](const typename TMap::value_type &pair) { return pair.first; }); +} + +void java_string_library_preprocesst::get_all_function_names( + id_sett &methods) const +{ + for(const id_mapt *map : id_maps) + add_keys_to_container(*map, methods); + + add_keys_to_container(conversion_table, methods); +} + /// Should be called to provide code for string functions that are used in the /// code but for which no implementation is provided. /// \param function_id: name of the function @@ -1767,11 +1800,12 @@ codet java_string_library_preprocesst::make_string_length_code( /// \return Code for the body of the String functions if they are part of the /// supported String functions, nil_exprt otherwise. exprt java_string_library_preprocesst::code_for_function( - const irep_idt &function_id, - const code_typet &type, - const source_locationt &loc, + const symbolt &symbol, symbol_table_baset &symbol_table) { + const irep_idt &function_id = symbol.name; + const code_typet &type = to_code_type(symbol.type); + const source_locationt &loc = symbol.location; auto it_id=cprover_equivalent_to_java_function.find(function_id); if(it_id!=cprover_equivalent_to_java_function.end()) return make_function_from_call(it_id->second, type, loc, symbol_table); diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index ef9c7a5e3cf..59b05304f28 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -22,6 +22,7 @@ Date: March 2017 #include // should get rid of this +#include #include #include #include "character_refine_preprocess.h" @@ -30,6 +31,8 @@ Date: March 2017 // Arbitrary limit of 10 arguments for the number of arguments to String.format #define MAX_FORMAT_ARGS 10 +typedef std::unordered_set id_sett; + class java_string_library_preprocesst:public messaget { public: @@ -43,11 +46,11 @@ class java_string_library_preprocesst:public messaget void initialize_conversion_table(); void initialize_refined_string_type(); - exprt code_for_function( - const irep_idt &function_id, - const code_typet &type, - const source_locationt &loc, - symbol_table_baset &symbol_table); + bool implements_function(const irep_idt &function_id) const; + void get_all_function_names(id_sett &methods) const; + + exprt + code_for_function(const symbolt &symbol, symbol_table_baset &symbol_table); codet replace_character_call(code_function_callt call) { @@ -125,6 +128,17 @@ class java_string_library_preprocesst:public messaget // of returning it id_mapt cprover_equivalent_to_java_assign_function; + const std::array id_maps = std::array + { + { + &cprover_equivalent_to_java_function, + &cprover_equivalent_to_java_string_returning_function, + &cprover_equivalent_to_java_constructor, + &cprover_equivalent_to_java_assign_and_return_function, + &cprover_equivalent_to_java_assign_function, + } + }; + // A set tells us what java types should be considered as string objects std::unordered_set string_types; diff --git a/src/util/language.h b/src/util/language.h index 817a522b9c7..f1ee81f84a9 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -12,7 +12,7 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_UTIL_LANGUAGE_H #define CPROVER_UTIL_LANGUAGE_H -#include +#include #include #include #include // unique_ptr @@ -22,6 +22,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "message.h" +typedef std::unordered_set id_sett; + class symbol_tablet; class symbol_table_baset; class exprt; @@ -76,11 +78,12 @@ class languaget:public messaget // add lazy functions provided to set - virtual void lazy_methods_provided(std::set &methods) const + virtual void methods_provided(id_sett &methods) const { } // populate a lazy method - virtual void convert_lazy_method(const irep_idt &id, symbol_tablet &) + virtual void + convert_lazy_method(const irep_idt &function_id, symbol_tablet &symbol_table) { } /// Final adjustments, e.g. initializing stub functions and globals that diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index 5eb6b2c654a..f42e7f88159 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -141,8 +141,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // register lazy methods. // TODO: learn about modules and generalise this // to module-providing languages if required. - std::set lazy_method_ids; - file.second.language->lazy_methods_provided(lazy_method_ids); + id_sett lazy_method_ids; + file.second.language->methods_provided(lazy_method_ids); for(const auto &id : lazy_method_ids) lazy_method_map[id]=&file.second; } diff --git a/src/util/safe_pointer.h b/src/util/safe_pointer.h deleted file mode 100644 index 152eb3242d9..00000000000 --- a/src/util/safe_pointer.h +++ /dev/null @@ -1,67 +0,0 @@ -/*******************************************************************\ - -Module: Simple checked pointers - -Author: Chris Smowton, chris@smowton.net - -\*******************************************************************/ - -/// \file -/// Simple checked pointers - -#ifndef CPROVER_UTIL_SAFE_POINTER_H -#define CPROVER_UTIL_SAFE_POINTER_H - -template class safe_pointer -{ - public: - operator bool() const - { - return !!ptr; - } - - T *get() const - { - assert(ptr && "dereferenced a null safe pointer"); - return ptr; - } - - T &operator*() const - { - return *get(); - } - - T *operator->() const - { - return get(); - } - - static safe_pointer create_null() - { - return safe_pointer(); - } - - static safe_pointer create_non_null( - T *target) - { - assert(target && "initialized safe pointer with null"); - return safe_pointer(target); - } - - static safe_pointer create_maybe_null( - T *target) - { - return safe_pointer(target); - } - - protected: - T *ptr; - - explicit safe_pointer(T *target) : ptr(target) - {} - - safe_pointer() : ptr(nullptr) - {} -}; - -#endif