forked from diffblue/cbmc
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move string literal initialisation into separate file
- Loading branch information
Showing
4 changed files
with
226 additions
and
163 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,8 +11,6 @@ Author: Daniel Kroening, [email protected] | |
|
||
#include "java_bytecode_typecheck.h" | ||
|
||
#include <iomanip> | ||
|
||
#include <util/arith_tools.h> | ||
#include <util/unicode.h> | ||
|
||
|
@@ -23,6 +21,7 @@ Author: Daniel Kroening, [email protected] | |
#include "java_utils.h" | ||
#include "java_root_class.h" | ||
#include "java_string_library_preprocess.h" | ||
#include "java_string_literals.h" | ||
|
||
void java_bytecode_typecheckt::typecheck_expr(exprt &expr) | ||
{ | ||
|
@@ -71,169 +70,11 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array( | |
typecheck_type(type); | ||
} | ||
|
||
static std::string escape_non_alnum(const std::string &toescape) | ||
{ | ||
std::ostringstream escaped; | ||
for(auto &ch : toescape) | ||
{ | ||
if(ch=='_') | ||
escaped << "__"; | ||
else if(isalnum(ch)) | ||
escaped << ch; | ||
else | ||
escaped << '_' | ||
<< std::hex | ||
<< std::setfill('0') | ||
<< std::setw(2) | ||
<< (unsigned int)ch; | ||
} | ||
return escaped.str(); | ||
} | ||
|
||
/// Convert UCS-2 or UTF-16 to an array expression. | ||
/// \par parameters: `in`: wide string to convert | ||
/// \return Returns a Java char array containing the same wchars. | ||
static array_exprt utf16_to_array(const std::wstring &in) | ||
{ | ||
const auto jchar=java_char_type(); | ||
array_exprt ret( | ||
array_typet(jchar, from_integer(in.length(), java_int_type()))); | ||
for(const auto c : in) | ||
ret.copy_to_operands(from_integer(c, jchar)); | ||
return ret; | ||
} | ||
|
||
void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) | ||
{ | ||
const irep_idt value=expr.get(ID_value); | ||
const symbol_typet string_type("java::java.lang.String"); | ||
|
||
const std::string escaped_symbol_name = escape_non_alnum(id2string(value)); | ||
const std::string escaped_symbol_name_with_prefix = | ||
JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name; | ||
|
||
auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix); | ||
if(findit!=symbol_table.symbols.end()) | ||
{ | ||
expr=address_of_exprt(findit->second.symbol_expr()); | ||
return; | ||
} | ||
|
||
// Create a new symbol: | ||
symbolt new_symbol; | ||
new_symbol.name = escaped_symbol_name_with_prefix; | ||
new_symbol.type=string_type; | ||
new_symbol.base_name = escaped_symbol_name; | ||
new_symbol.pretty_name=value; | ||
new_symbol.mode=ID_java; | ||
new_symbol.is_type=false; | ||
new_symbol.is_lvalue=true; | ||
new_symbol.is_static_lifetime=true; // These are basically const global data. | ||
|
||
// Regardless of string refinement setting, at least initialize | ||
// the literal with @clsid = String and @lock = false: | ||
symbol_typet jlo_symbol("java::java.lang.Object"); | ||
const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol)); | ||
struct_exprt jlo_init(jlo_symbol); | ||
const auto &jls_struct=to_struct_type(ns.follow(string_type)); | ||
java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String"); | ||
|
||
// If string refinement *is* around, populate the actual | ||
// contents as well: | ||
if(string_refinement_enabled) | ||
{ | ||
struct_exprt literal_init(new_symbol.type); | ||
literal_init.operands().resize(jls_struct.components().size()); | ||
const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object"); | ||
literal_init.operands()[jlo_nb] = jlo_init; | ||
|
||
const std::size_t length_nb = jls_struct.component_number("length"); | ||
const typet &length_type = jls_struct.components()[length_nb].type(); | ||
const exprt length = from_integer(id2string(value).size(), length_type); | ||
literal_init.operands()[length_nb] = length; | ||
|
||
// Initialize the string with a constant utf-16 array: | ||
symbolt array_symbol; | ||
array_symbol.name = escaped_symbol_name_with_prefix + "_constarray"; | ||
array_symbol.base_name = escaped_symbol_name + "_constarray"; | ||
array_symbol.pretty_name=value; | ||
array_symbol.mode=ID_java; | ||
array_symbol.is_type=false; | ||
array_symbol.is_lvalue=true; | ||
// These are basically const global data: | ||
array_symbol.is_static_lifetime=true; | ||
array_symbol.is_state_var=true; | ||
array_symbol.value = | ||
utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); | ||
array_symbol.type = array_symbol.value.type(); | ||
|
||
if(symbol_table.add(array_symbol)) | ||
throw "failed to add constarray symbol to symbol table"; | ||
|
||
const symbol_exprt array_expr = array_symbol.symbol_expr(); | ||
const address_of_exprt array_pointer( | ||
index_exprt(array_expr, from_integer(0, java_int_type()))); | ||
|
||
const std::size_t data_nb = jls_struct.component_number("data"); | ||
literal_init.operands()[data_nb] = array_pointer; | ||
|
||
// Associate array with pointer | ||
symbolt return_symbol; | ||
return_symbol.name = escaped_symbol_name_with_prefix + "_return_value"; | ||
return_symbol.base_name = escaped_symbol_name + "_return_value"; | ||
return_symbol.pretty_name = | ||
escaped_symbol_name.length() > 10 | ||
? escaped_symbol_name.substr(0, 10) + "..._return_value" | ||
: escaped_symbol_name + "_return_value"; | ||
return_symbol.mode = ID_java; | ||
return_symbol.is_type = false; | ||
return_symbol.is_lvalue = true; | ||
return_symbol.is_static_lifetime = true; | ||
return_symbol.is_state_var = true; | ||
return_symbol.value = make_function_application( | ||
ID_cprover_associate_array_to_pointer_func, | ||
{array_symbol.value, array_pointer}, | ||
java_int_type(), | ||
symbol_table); | ||
return_symbol.type = return_symbol.value.type(); | ||
if(symbol_table.add(return_symbol)) | ||
throw "failed to add return symbol to symbol table"; | ||
new_symbol.value=literal_init; | ||
} | ||
else if(jls_struct.components().size()>=1 && | ||
jls_struct.components()[0].get_name()=="@java.lang.Object") | ||
{ | ||
// Case where something defined java.lang.String, so it has | ||
// a proper base class (always java.lang.Object in practical | ||
// JDKs seen so far) | ||
struct_exprt literal_init(new_symbol.type); | ||
literal_init.move_to_operands(jlo_init); | ||
for(const auto &comp : jls_struct.components()) | ||
{ | ||
if(comp.get_name()=="@java.lang.Object") | ||
continue; | ||
// Other members of JDK's java.lang.String we don't understand | ||
// without string-refinement. Just zero-init them; consider using | ||
// test-gen-like nondet object trees instead. | ||
literal_init.copy_to_operands( | ||
zero_initializer(comp.type(), expr.source_location(), ns)); | ||
} | ||
new_symbol.value=literal_init; | ||
} | ||
else if(jls_struct.get_bool(ID_incomplete_class)) | ||
{ | ||
// Case where java.lang.String was stubbed, and so directly defines | ||
// @class_identifier and @lock: | ||
new_symbol.value=jlo_init; | ||
} | ||
|
||
if(symbol_table.add(new_symbol)) | ||
{ | ||
error() << "failed to add string literal symbol to symbol table" << eom; | ||
throw 0; | ||
} | ||
|
||
expr=address_of_exprt(new_symbol.symbol_expr()); | ||
expr = address_of_exprt( | ||
get_or_create_string_literal_symbol( | ||
expr, symbol_table, string_refinement_enabled)); | ||
} | ||
|
||
void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
/*******************************************************************\ | ||
Module: Java string literal processing | ||
Author: Chris Smowton, [email protected] | ||
\*******************************************************************/ | ||
|
||
#include "java_string_literals.h" | ||
#include "java_root_class.h" | ||
#include "java_types.h" | ||
#include "java_utils.h" | ||
|
||
#include <linking/zero_initializer.h> | ||
|
||
#include <util/arith_tools.h> | ||
#include <util/namespace.h> | ||
#include <util/unicode.h> | ||
|
||
#include <iomanip> | ||
#include <sstream> | ||
|
||
/// Replace non-alphanumeric characters with `_xx` escapes, where xx are hex | ||
/// digits. Underscores are replaced by `__`. | ||
/// \param to_escape: string to escape | ||
/// \return string with non-alphanumeric characters escaped | ||
static std::string escape_non_alnum(const std::string &to_escape) | ||
{ | ||
std::ostringstream escaped; | ||
for(auto &ch : to_escape) | ||
{ | ||
if(ch=='_') | ||
escaped << "__"; | ||
else if(isalnum(ch)) | ||
escaped << ch; | ||
else | ||
escaped << '_' | ||
<< std::hex | ||
<< std::setfill('0') | ||
<< std::setw(2) | ||
<< (unsigned int)ch; | ||
} | ||
return escaped.str(); | ||
} | ||
|
||
/// Convert UCS-2 or UTF-16 to an array expression. | ||
/// \par parameters: `in`: wide string to convert | ||
/// \return Returns a Java char array containing the same wchars. | ||
static array_exprt utf16_to_array(const std::wstring &in) | ||
{ | ||
const auto jchar=java_char_type(); | ||
array_exprt ret( | ||
array_typet(jchar, from_integer(in.length(), java_int_type()))); | ||
for(const auto c : in) | ||
ret.copy_to_operands(from_integer(c, jchar)); | ||
return ret; | ||
} | ||
|
||
/// Creates or gets an existing constant global symbol for a given string | ||
/// literal. | ||
/// \param string_expr: string literal expression to convert | ||
/// \param symbol_table: global symbol table. If not already present, constant | ||
/// global symbols will be added. | ||
/// \param string_refinement_enabled: if true, string refinement's string data | ||
/// structure will also be initialised and added to the symbol table. | ||
/// \return a symbol_expr corresponding to the new or existing literal symbol. | ||
symbol_exprt get_or_create_string_literal_symbol( | ||
const exprt &string_expr, | ||
symbol_table_baset &symbol_table, | ||
bool string_refinement_enabled) | ||
{ | ||
PRECONDITION(string_expr.id() == ID_java_string_literal); | ||
const irep_idt value = string_expr.get(ID_value); | ||
const symbol_typet string_type("java::java.lang.String"); | ||
|
||
const std::string escaped_symbol_name = escape_non_alnum(id2string(value)); | ||
const std::string escaped_symbol_name_with_prefix = | ||
JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name; | ||
|
||
auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix); | ||
if(findit != symbol_table.symbols.end()) | ||
return findit->second.symbol_expr(); | ||
|
||
// Create a new symbol: | ||
symbolt new_symbol; | ||
new_symbol.name = escaped_symbol_name_with_prefix; | ||
new_symbol.type = string_type; | ||
new_symbol.base_name = escaped_symbol_name; | ||
new_symbol.pretty_name = value; | ||
new_symbol.mode = ID_java; | ||
new_symbol.is_type = false; | ||
new_symbol.is_lvalue = true; | ||
new_symbol.is_static_lifetime = true; | ||
|
||
namespacet ns(symbol_table); | ||
|
||
// Regardless of string refinement setting, at least initialize | ||
// the literal with @clsid = String and @lock = false: | ||
symbol_typet jlo_symbol("java::java.lang.Object"); | ||
const auto &jlo_struct = to_struct_type(ns.follow(jlo_symbol)); | ||
struct_exprt jlo_init(jlo_symbol); | ||
const auto &jls_struct = to_struct_type(ns.follow(string_type)); | ||
java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String"); | ||
|
||
// If string refinement *is* around, populate the actual | ||
// contents as well: | ||
if(string_refinement_enabled) | ||
{ | ||
struct_exprt literal_init(new_symbol.type); | ||
literal_init.operands().resize(jls_struct.components().size()); | ||
const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object"); | ||
literal_init.operands()[jlo_nb] = jlo_init; | ||
|
||
const std::size_t length_nb = jls_struct.component_number("length"); | ||
const typet &length_type = jls_struct.components()[length_nb].type(); | ||
const exprt length = from_integer(id2string(value).size(), length_type); | ||
literal_init.operands()[length_nb] = length; | ||
|
||
// Initialize the string with a constant utf-16 array: | ||
symbolt array_symbol; | ||
array_symbol.name = escaped_symbol_name_with_prefix + "_constarray"; | ||
array_symbol.base_name = escaped_symbol_name + "_constarray"; | ||
array_symbol.pretty_name = value; | ||
array_symbol.mode = ID_java; | ||
array_symbol.is_type = false; | ||
array_symbol.is_lvalue = true; | ||
// These are basically const global data: | ||
array_symbol.is_static_lifetime = true; | ||
array_symbol.is_state_var = true; | ||
array_symbol.value = | ||
utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); | ||
array_symbol.type = array_symbol.value.type(); | ||
|
||
if(symbol_table.add(array_symbol)) | ||
throw "failed to add constarray symbol to symbol table"; | ||
|
||
const symbol_exprt array_expr = array_symbol.symbol_expr(); | ||
const address_of_exprt array_pointer( | ||
index_exprt(array_expr, from_integer(0, java_int_type()))); | ||
|
||
const std::size_t data_nb = jls_struct.component_number("data"); | ||
literal_init.operands()[data_nb] = array_pointer; | ||
|
||
// Associate array with pointer | ||
symbolt return_symbol; | ||
return_symbol.name = escaped_symbol_name_with_prefix + "_return_value"; | ||
return_symbol.base_name = escaped_symbol_name + "_return_value"; | ||
return_symbol.pretty_name = | ||
escaped_symbol_name.length() > 10 | ||
? escaped_symbol_name.substr(0, 10) + "..._return_value" | ||
: escaped_symbol_name + "_return_value"; | ||
return_symbol.mode = ID_java; | ||
return_symbol.is_type = false; | ||
return_symbol.is_lvalue = true; | ||
return_symbol.is_static_lifetime = true; | ||
return_symbol.is_state_var = true; | ||
return_symbol.value = make_function_application( | ||
ID_cprover_associate_array_to_pointer_func, | ||
{array_symbol.value, array_pointer}, | ||
java_int_type(), | ||
symbol_table); | ||
return_symbol.type = return_symbol.value.type(); | ||
if(symbol_table.add(return_symbol)) | ||
throw "failed to add return symbol to symbol table"; | ||
new_symbol.value = literal_init; | ||
} | ||
else if(jls_struct.components().size()>=1 && | ||
jls_struct.components()[0].get_name()=="@java.lang.Object") | ||
{ | ||
// Case where something defined java.lang.String, so it has | ||
// a proper base class (always java.lang.Object in practical | ||
// JDKs seen so far) | ||
struct_exprt literal_init(new_symbol.type); | ||
literal_init.move_to_operands(jlo_init); | ||
for(const auto &comp : jls_struct.components()) | ||
{ | ||
if(comp.get_name()=="@java.lang.Object") | ||
continue; | ||
// Other members of JDK's java.lang.String we don't understand | ||
// without string-refinement. Just zero-init them; consider using | ||
// test-gen-like nondet object trees instead. | ||
literal_init.copy_to_operands( | ||
zero_initializer(comp.type(), string_expr.source_location(), ns)); | ||
} | ||
new_symbol.value = literal_init; | ||
} | ||
else if(jls_struct.get_bool(ID_incomplete_class)) | ||
{ | ||
// Case where java.lang.String was stubbed, and so directly defines | ||
// @class_identifier and @lock: | ||
new_symbol.value = jlo_init; | ||
} | ||
|
||
bool add_failed = symbol_table.add(new_symbol); | ||
INVARIANT( | ||
!add_failed, | ||
"string literal symbol was already checked not to be " | ||
"in the symbol table, so adding it should succeed"); | ||
|
||
return new_symbol.symbol_expr(); | ||
} |
Oops, something went wrong.