From 515ebdd29386d34a1b84e50052e32f710a165ac2 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 30 Jan 2018 11:08:57 +0000 Subject: [PATCH 1/3] CI lazy methods: scan global initialisers for global references CI lazy methods now notices cases like global g1 having initialiser &g2, in which case having a reference to global g1 implies a reference to global g2. Currently this only happens in the string solver in Java, where a literal's initialiser references a constant array global. --- src/java_bytecode/ci_lazy_methods.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 2aa62b01904..7ebdb84221e 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -479,6 +479,8 @@ void ci_lazy_methodst::gather_needed_globals( findit->second.is_static_lifetime) { needed.add(findit->second); + // Gather any globals referenced in the initialiser: + gather_needed_globals(findit->second.value, symbol_table, needed); } } else From d95cb12c92425c8d57e70302a10e9d357a42ff92 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Jan 2018 14:03:27 +0000 Subject: [PATCH 2/3] Move string literal initialisation into separate file --- src/java_bytecode/Makefile | 1 + .../java_bytecode_typecheck_expr.cpp | 167 +-------------- src/java_bytecode/java_string_literals.cpp | 201 ++++++++++++++++++ src/java_bytecode/java_string_literals.h | 20 ++ 4 files changed, 226 insertions(+), 163 deletions(-) create mode 100644 src/java_bytecode/java_string_literals.cpp create mode 100644 src/java_bytecode/java_string_literals.h diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index c0cbc6daebd..02583190acf 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -24,6 +24,7 @@ SRC = bytecode_info.cpp \ java_pointer_casts.cpp \ java_root_class.cpp \ java_string_library_preprocess.cpp \ + java_string_literals.cpp \ java_types.cpp \ java_utils.cpp \ generate_java_generic_type.cpp \ diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index 0a999a6b912..8f8864660e1 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -11,8 +11,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_typecheck.h" -#include - #include #include @@ -23,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_utils.h" #include "java_root_class.h" #include "java_string_library_preprocess.h" +#include "java_string_literals.h" void java_bytecode_typecheckt::typecheck_expr(exprt &expr) { @@ -71,169 +70,11 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array( typecheck_type(type); } -static std::string escape_non_alnum(const std::string &toescape) -{ - std::ostringstream escaped; - for(auto &ch : toescape) - { - if(ch=='_') - escaped << "__"; - else if(isalnum(ch)) - escaped << ch; - else - escaped << '_' - << std::hex - << std::setfill('0') - << std::setw(2) - << (unsigned int)ch; - } - return escaped.str(); -} - -/// Convert UCS-2 or UTF-16 to an array expression. -/// \par parameters: `in`: wide string to convert -/// \return Returns a Java char array containing the same wchars. -static array_exprt utf16_to_array(const std::wstring &in) -{ - const auto jchar=java_char_type(); - array_exprt ret( - array_typet(jchar, from_integer(in.length(), java_int_type()))); - for(const auto c : in) - ret.copy_to_operands(from_integer(c, jchar)); - return ret; -} - void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) { - const irep_idt value=expr.get(ID_value); - const symbol_typet string_type("java::java.lang.String"); - - const std::string escaped_symbol_name = escape_non_alnum(id2string(value)); - const std::string escaped_symbol_name_with_prefix = - JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name; - - auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix); - if(findit!=symbol_table.symbols.end()) - { - expr=address_of_exprt(findit->second.symbol_expr()); - return; - } - - // Create a new symbol: - symbolt new_symbol; - new_symbol.name = escaped_symbol_name_with_prefix; - new_symbol.type=string_type; - new_symbol.base_name = escaped_symbol_name; - new_symbol.pretty_name=value; - new_symbol.mode=ID_java; - new_symbol.is_type=false; - new_symbol.is_lvalue=true; - new_symbol.is_static_lifetime=true; // These are basically const global data. - - // Regardless of string refinement setting, at least initialize - // the literal with @clsid = String and @lock = false: - symbol_typet jlo_symbol("java::java.lang.Object"); - const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol)); - struct_exprt jlo_init(jlo_symbol); - const auto &jls_struct=to_struct_type(ns.follow(string_type)); - java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String"); - - // If string refinement *is* around, populate the actual - // contents as well: - if(string_refinement_enabled) - { - struct_exprt literal_init(new_symbol.type); - literal_init.operands().resize(jls_struct.components().size()); - const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object"); - literal_init.operands()[jlo_nb] = jlo_init; - - const std::size_t length_nb = jls_struct.component_number("length"); - const typet &length_type = jls_struct.components()[length_nb].type(); - const exprt length = from_integer(id2string(value).size(), length_type); - literal_init.operands()[length_nb] = length; - - // Initialize the string with a constant utf-16 array: - symbolt array_symbol; - array_symbol.name = escaped_symbol_name_with_prefix + "_constarray"; - array_symbol.base_name = escaped_symbol_name + "_constarray"; - array_symbol.pretty_name=value; - array_symbol.mode=ID_java; - array_symbol.is_type=false; - array_symbol.is_lvalue=true; - // These are basically const global data: - array_symbol.is_static_lifetime=true; - array_symbol.is_state_var=true; - array_symbol.value = - utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); - array_symbol.type = array_symbol.value.type(); - - if(symbol_table.add(array_symbol)) - throw "failed to add constarray symbol to symbol table"; - - const symbol_exprt array_expr = array_symbol.symbol_expr(); - const address_of_exprt array_pointer( - index_exprt(array_expr, from_integer(0, java_int_type()))); - - const std::size_t data_nb = jls_struct.component_number("data"); - literal_init.operands()[data_nb] = array_pointer; - - // Associate array with pointer - symbolt return_symbol; - return_symbol.name = escaped_symbol_name_with_prefix + "_return_value"; - return_symbol.base_name = escaped_symbol_name + "_return_value"; - return_symbol.pretty_name = - escaped_symbol_name.length() > 10 - ? escaped_symbol_name.substr(0, 10) + "..._return_value" - : escaped_symbol_name + "_return_value"; - return_symbol.mode = ID_java; - return_symbol.is_type = false; - return_symbol.is_lvalue = true; - return_symbol.is_static_lifetime = true; - return_symbol.is_state_var = true; - return_symbol.value = make_function_application( - ID_cprover_associate_array_to_pointer_func, - {array_symbol.value, array_pointer}, - java_int_type(), - symbol_table); - return_symbol.type = return_symbol.value.type(); - if(symbol_table.add(return_symbol)) - throw "failed to add return symbol to symbol table"; - new_symbol.value=literal_init; - } - else if(jls_struct.components().size()>=1 && - jls_struct.components()[0].get_name()=="@java.lang.Object") - { - // Case where something defined java.lang.String, so it has - // a proper base class (always java.lang.Object in practical - // JDKs seen so far) - struct_exprt literal_init(new_symbol.type); - literal_init.move_to_operands(jlo_init); - for(const auto &comp : jls_struct.components()) - { - if(comp.get_name()=="@java.lang.Object") - continue; - // Other members of JDK's java.lang.String we don't understand - // without string-refinement. Just zero-init them; consider using - // test-gen-like nondet object trees instead. - literal_init.copy_to_operands( - zero_initializer(comp.type(), expr.source_location(), ns)); - } - new_symbol.value=literal_init; - } - else if(jls_struct.get_bool(ID_incomplete_class)) - { - // Case where java.lang.String was stubbed, and so directly defines - // @class_identifier and @lock: - new_symbol.value=jlo_init; - } - - if(symbol_table.add(new_symbol)) - { - error() << "failed to add string literal symbol to symbol table" << eom; - throw 0; - } - - expr=address_of_exprt(new_symbol.symbol_expr()); + expr = address_of_exprt( + get_or_create_string_literal_symbol( + expr, symbol_table, string_refinement_enabled)); } void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr) diff --git a/src/java_bytecode/java_string_literals.cpp b/src/java_bytecode/java_string_literals.cpp new file mode 100644 index 00000000000..d5971f9e3e2 --- /dev/null +++ b/src/java_bytecode/java_string_literals.cpp @@ -0,0 +1,201 @@ +/*******************************************************************\ + +Module: Java string literal processing + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#include "java_string_literals.h" +#include "java_root_class.h" +#include "java_types.h" +#include "java_utils.h" + +#include + +#include +#include +#include + +#include +#include + +/// Replace non-alphanumeric characters with `_xx` escapes, where xx are hex +/// digits. Underscores are replaced by `__`. +/// \param to_escape: string to escape +/// \return string with non-alphanumeric characters escaped +static std::string escape_non_alnum(const std::string &to_escape) +{ + std::ostringstream escaped; + for(auto &ch : to_escape) + { + if(ch=='_') + escaped << "__"; + else if(isalnum(ch)) + escaped << ch; + else + escaped << '_' + << std::hex + << std::setfill('0') + << std::setw(2) + << (unsigned int)ch; + } + return escaped.str(); +} + +/// Convert UCS-2 or UTF-16 to an array expression. +/// \par parameters: `in`: wide string to convert +/// \return Returns a Java char array containing the same wchars. +static array_exprt utf16_to_array(const std::wstring &in) +{ + const auto jchar=java_char_type(); + array_exprt ret( + array_typet(jchar, from_integer(in.length(), java_int_type()))); + for(const auto c : in) + ret.copy_to_operands(from_integer(c, jchar)); + return ret; +} + +/// Creates or gets an existing constant global symbol for a given string +/// literal. +/// \param string_expr: string literal expression to convert +/// \param symbol_table: global symbol table. If not already present, constant +/// global symbols will be added. +/// \param string_refinement_enabled: if true, string refinement's string data +/// structure will also be initialised and added to the symbol table. +/// \return a symbol_expr corresponding to the new or existing literal symbol. +symbol_exprt get_or_create_string_literal_symbol( + const exprt &string_expr, + symbol_table_baset &symbol_table, + bool string_refinement_enabled) +{ + PRECONDITION(string_expr.id() == ID_java_string_literal); + const irep_idt value = string_expr.get(ID_value); + const symbol_typet string_type("java::java.lang.String"); + + const std::string escaped_symbol_name = escape_non_alnum(id2string(value)); + const std::string escaped_symbol_name_with_prefix = + JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name; + + auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix); + if(findit != symbol_table.symbols.end()) + return findit->second.symbol_expr(); + + // Create a new symbol: + symbolt new_symbol; + new_symbol.name = escaped_symbol_name_with_prefix; + new_symbol.type = string_type; + new_symbol.base_name = escaped_symbol_name; + new_symbol.pretty_name = value; + new_symbol.mode = ID_java; + new_symbol.is_type = false; + new_symbol.is_lvalue = true; + new_symbol.is_static_lifetime = true; + + namespacet ns(symbol_table); + + // Regardless of string refinement setting, at least initialize + // the literal with @clsid = String and @lock = false: + symbol_typet jlo_symbol("java::java.lang.Object"); + const auto &jlo_struct = to_struct_type(ns.follow(jlo_symbol)); + struct_exprt jlo_init(jlo_symbol); + const auto &jls_struct = to_struct_type(ns.follow(string_type)); + java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String"); + + // If string refinement *is* around, populate the actual + // contents as well: + if(string_refinement_enabled) + { + struct_exprt literal_init(new_symbol.type); + literal_init.operands().resize(jls_struct.components().size()); + const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object"); + literal_init.operands()[jlo_nb] = jlo_init; + + const std::size_t length_nb = jls_struct.component_number("length"); + const typet &length_type = jls_struct.components()[length_nb].type(); + const exprt length = from_integer(id2string(value).size(), length_type); + literal_init.operands()[length_nb] = length; + + // Initialize the string with a constant utf-16 array: + symbolt array_symbol; + array_symbol.name = escaped_symbol_name_with_prefix + "_constarray"; + array_symbol.base_name = escaped_symbol_name + "_constarray"; + array_symbol.pretty_name = value; + array_symbol.mode = ID_java; + array_symbol.is_type = false; + array_symbol.is_lvalue = true; + // These are basically const global data: + array_symbol.is_static_lifetime = true; + array_symbol.is_state_var = true; + array_symbol.value = + utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); + array_symbol.type = array_symbol.value.type(); + + if(symbol_table.add(array_symbol)) + throw "failed to add constarray symbol to symbol table"; + + const symbol_exprt array_expr = array_symbol.symbol_expr(); + const address_of_exprt array_pointer( + index_exprt(array_expr, from_integer(0, java_int_type()))); + + const std::size_t data_nb = jls_struct.component_number("data"); + literal_init.operands()[data_nb] = array_pointer; + + // Associate array with pointer + symbolt return_symbol; + return_symbol.name = escaped_symbol_name_with_prefix + "_return_value"; + return_symbol.base_name = escaped_symbol_name + "_return_value"; + return_symbol.pretty_name = + escaped_symbol_name.length() > 10 + ? escaped_symbol_name.substr(0, 10) + "..._return_value" + : escaped_symbol_name + "_return_value"; + return_symbol.mode = ID_java; + return_symbol.is_type = false; + return_symbol.is_lvalue = true; + return_symbol.is_static_lifetime = true; + return_symbol.is_state_var = true; + return_symbol.value = make_function_application( + ID_cprover_associate_array_to_pointer_func, + {array_symbol.value, array_pointer}, + java_int_type(), + symbol_table); + return_symbol.type = return_symbol.value.type(); + if(symbol_table.add(return_symbol)) + throw "failed to add return symbol to symbol table"; + new_symbol.value = literal_init; + } + else if(jls_struct.components().size()>=1 && + jls_struct.components()[0].get_name()=="@java.lang.Object") + { + // Case where something defined java.lang.String, so it has + // a proper base class (always java.lang.Object in practical + // JDKs seen so far) + struct_exprt literal_init(new_symbol.type); + literal_init.move_to_operands(jlo_init); + for(const auto &comp : jls_struct.components()) + { + if(comp.get_name()=="@java.lang.Object") + continue; + // Other members of JDK's java.lang.String we don't understand + // without string-refinement. Just zero-init them; consider using + // test-gen-like nondet object trees instead. + literal_init.copy_to_operands( + zero_initializer(comp.type(), string_expr.source_location(), ns)); + } + new_symbol.value = literal_init; + } + else if(jls_struct.get_bool(ID_incomplete_class)) + { + // Case where java.lang.String was stubbed, and so directly defines + // @class_identifier and @lock: + new_symbol.value = jlo_init; + } + + bool add_failed = symbol_table.add(new_symbol); + INVARIANT( + !add_failed, + "string literal symbol was already checked not to be " + "in the symbol table, so adding it should succeed"); + + return new_symbol.symbol_expr(); +} diff --git a/src/java_bytecode/java_string_literals.h b/src/java_bytecode/java_string_literals.h new file mode 100644 index 00000000000..1ec8a023557 --- /dev/null +++ b/src/java_bytecode/java_string_literals.h @@ -0,0 +1,20 @@ +/*******************************************************************\ + +Module: Java string literal processing + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_STRING_LITERALS_H +#define CPROVER_JAVA_BYTECODE_JAVA_STRING_LITERALS_H + +#include +#include + +symbol_exprt get_or_create_string_literal_symbol( + const exprt &string_expr, + symbol_table_baset &symbol_table, + bool string_refinement_enabled); + +#endif From 8a2795030693d3354b60e33984bf124808de8895 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Jan 2018 14:04:12 +0000 Subject: [PATCH 3/3] Java frontend: create String an Class literals earlier This creates String and Class literals ahead of running java_bytecode_convert_method instead of creating them in an after-the-fact fixup in typecheck_expr as previously. These now exist before method conversion begins and __CPROVER_initialize is created, which means the String literals are correctly initialised even when using incremental method conversion. This can be a little wasteful if they are only referred to from methods that would never truly be invoked, but preliminary experiments suggest that creating ~10000 Strings takes under a second, and that Java lazy loading can quickly delete ones that are not in fact used. Java CI lazy methods is augmented a little to notice that if we need global A, and global A is initialised with a reference to global B, then we need global B as well. We appear to have got away wtih this before because the inter-global references were introduced after CI lazy methods finished loading methods. --- .../java_bytecode_convert_method.cpp | 39 +----- src/java_bytecode/java_bytecode_language.cpp | 130 ++++++++++++++++++ src/java_bytecode/java_bytecode_typecheck.h | 1 - .../java_bytecode_typecheck_expr.cpp | 15 +- src/java_bytecode/java_string_literals.cpp | 11 +- 5 files changed, 150 insertions(+), 46 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 7b2ad658a09..256f1131bb5 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1712,41 +1712,12 @@ codet java_bytecode_convert_methodt::convert_instructions( { assert(op.empty() && results.size()==1); - // 1) Pushing a String causes a reference to a java.lang.String object - // to be constructed and pushed onto the operand stack. + INVARIANT( + arg0.id() != ID_java_string_literal && arg0.id() != ID_type, + "String and Class literals should have been lowered in " + "generate_constant_global_variables"); - // 2) Pushing an int or a float causes a primitive value to be pushed - // onto the stack. - - // 3) Pushing a Class constant causes a reference to a java.lang.Class - // to be pushed onto the operand stack - - if(arg0.id()==ID_java_string_literal) - { - // these need to be references to java.lang.String - results[0]=arg0; - symbol_typet string_type("java::java.lang.String"); - results[0].type()=java_reference_type(string_type); - } - else if(arg0.id()==ID_type) - { - irep_idt class_id=arg0.type().get(ID_identifier); - symbol_typet java_lang_Class("java::java.lang.Class"); - symbol_exprt symbol_expr( - id2string(class_id)+"@class_model", - java_lang_Class); - address_of_exprt address_of_expr(symbol_expr); - results[0]=address_of_expr; - } - else if(arg0.id()==ID_constant) - { - results[0]=arg0; - } - else - { - error() << "unexpected ldc argument" << eom; - throw 0; - } + results[0] = arg0; } else if(statement=="goto" || statement=="goto_w") { diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 2fb13dc36e2..355e194da2f 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -30,6 +30,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_entry_point.h" #include "java_bytecode_parser.h" #include "java_class_loader.h" +#include "java_string_literals.h" #include "java_utils.h" #include #include @@ -256,6 +257,118 @@ static void infer_opaque_type_fields( } } +/// Create if necessary, then return the constant global java.lang.Class symbol +/// for a given class id +/// \param class_id: class identifier +/// \param symbol_table: global symbol table; a symbol may be added +/// \return java.lang.Class typed symbol expression +static symbol_exprt get_or_create_class_literal_symbol( + const irep_idt &class_id, symbol_tablet &symbol_table) +{ + symbol_typet java_lang_Class("java::java.lang.Class"); + symbol_exprt symbol_expr( + id2string(class_id)+"@class_model", + java_lang_Class); + if(!symbol_table.has_symbol(symbol_expr.get_identifier())) + { + symbolt new_class_symbol; + new_class_symbol.name = symbol_expr.get_identifier(); + new_class_symbol.type = symbol_expr.type(); + INVARIANT( + has_prefix(id2string(new_class_symbol.name), "java::"), + "class identifier should have 'java::' prefix"); + new_class_symbol.base_name = + id2string(new_class_symbol.name).substr(6); + new_class_symbol.mode = ID_java; + new_class_symbol.is_lvalue = true; + new_class_symbol.is_state_var = true; + new_class_symbol.is_static_lifetime = true; + symbol_table.add(new_class_symbol); + } + + return symbol_expr; +} + +/// Get result of a Java load-constant (ldc) instruction. +/// Possible cases: +/// 1) Pushing a String causes a reference to a java.lang.String object +/// to be constructed and pushed onto the operand stack. +/// 2) Pushing an int or a float causes a primitive value to be pushed +/// onto the stack. +/// 3) Pushing a Class constant causes a reference to a java.lang.Class +/// to be pushed onto the operand stack +/// \param ldc_arg0: raw operand to the ldc opcode +/// \param symbol_table: global symbol table. If the argument `ldc_arg0` is a +/// String or Class constant then a new constant global may be added. +/// \param string_refinement_enabled: true if --refine-strings is enabled, which +/// influences how String literals are structured. +/// \return ldc result +static exprt get_ldc_result( + const exprt &ldc_arg0, + symbol_tablet &symbol_table, + bool string_refinement_enabled) +{ + if(ldc_arg0.id() == ID_type) + { + const irep_idt &class_id = ldc_arg0.type().get(ID_identifier); + return + address_of_exprt( + get_or_create_class_literal_symbol(class_id, symbol_table)); + } + else if(ldc_arg0.id() == ID_java_string_literal) + { + return + address_of_exprt( + get_or_create_string_literal_symbol( + ldc_arg0, symbol_table, string_refinement_enabled)); + } + else + { + INVARIANT( + ldc_arg0.id() == ID_constant, + "ldc argument should be constant, string literal or class literal"); + return ldc_arg0; + } +} + +/// Creates global variables for constants mentioned in a given method. These +/// are either string literals, or class literals (the java.lang.Class instance +/// returned by `(some_reference_typed_expression).class`). The method parse +/// tree is rewritten to directly reference these globals. +/// \param parse_tree: parse tree to search for constant global references +/// \param symbol_table: global symbol table, to which constant globals will be +/// added. +/// \param string_refinement_enabled: true if `--refine-stings` is active, +/// which changes how string literals are structured. +static void generate_constant_global_variables( + java_bytecode_parse_treet &parse_tree, + symbol_tablet &symbol_table, + bool string_refinement_enabled) +{ + for(auto &method : parse_tree.parsed_class.methods) + { + for(auto &instruction : method.instructions) + { + // ldc* instructions are Java bytecode "load constant" ops, which can + // retrieve a numeric constant, String literal, or Class literal. + if(instruction.statement == "ldc" || + instruction.statement == "ldc2" || + instruction.statement == "ldc_w" || + instruction.statement == "ldc2_w") + { + INVARIANT( + instruction.args.size() != 0, + "ldc instructions should have an argument"); + instruction.args[0] = + get_ldc_result( + instruction.args[0], + symbol_table, + string_refinement_enabled); + } + } + } +} + bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) @@ -317,6 +430,23 @@ bool java_bytecode_languaget::typecheck( infer_opaque_type_fields(c.second, symbol_table); } + // Create global variables for constants (String and Class literals) up front. + // This means that when running with lazy loading, we will be aware of these + // literal globals' existence when __CPROVER_initialize is generated in + // `generate_support_functions`. + const std::size_t before_constant_globals_size = symbol_table.symbols.size(); + for(auto &c : java_class_loader.class_map) + { + generate_constant_global_variables( + c.second, + symbol_table, + string_refinement_enabled); + } + status() << "Java: added " + << (symbol_table.symbols.size() - before_constant_globals_size) + << " String or Class constant symbols" + << messaget::eom; + // Now incrementally elaborate methods // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) diff --git a/src/java_bytecode/java_bytecode_typecheck.h b/src/java_bytecode/java_bytecode_typecheck.h index 29175fe875c..9ac5611539e 100644 --- a/src/java_bytecode/java_bytecode_typecheck.h +++ b/src/java_bytecode/java_bytecode_typecheck.h @@ -67,7 +67,6 @@ class java_bytecode_typecheckt:public typecheckt void typecheck_code(codet &); void typecheck_type(typet &); void typecheck_expr_symbol(symbol_exprt &); - void typecheck_expr_java_string_literal(exprt &); void typecheck_expr_member(member_exprt &); void typecheck_expr_java_new(side_effect_exprt &); void typecheck_expr_java_new_array(side_effect_exprt &); diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index 8f8864660e1..6fa16827fe9 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -21,7 +21,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_utils.h" #include "java_root_class.h" #include "java_string_library_preprocess.h" -#include "java_string_literals.h" void java_bytecode_typecheckt::typecheck_expr(exprt &expr) { @@ -39,6 +38,11 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr) Forall_operands(it, expr) typecheck_expr(*it); + INVARIANT( + expr.id() != ID_java_string_literal, + "String literals should have been converted to constant globals " + "before typecheck_expr"); + if(expr.id()==ID_symbol) typecheck_expr_symbol(to_symbol_expr(expr)); else if(expr.id()==ID_side_effect) @@ -49,8 +53,6 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr) else if(statement==ID_java_new_array) typecheck_expr_java_new_array(to_side_effect_expr(expr)); } - else if(expr.id()==ID_java_string_literal) - typecheck_expr_java_string_literal(expr); else if(expr.id()==ID_member) typecheck_expr_member(to_member_expr(expr)); } @@ -70,13 +72,6 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array( typecheck_type(type); } -void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) -{ - expr = address_of_exprt( - get_or_create_string_literal_symbol( - expr, symbol_table, string_refinement_enabled)); -} - void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr) { irep_idt identifier=expr.get_identifier(); diff --git a/src/java_bytecode/java_string_literals.cpp b/src/java_bytecode/java_string_literals.cpp index d5971f9e3e2..5ec35ac0d6e 100644 --- a/src/java_bytecode/java_string_literals.cpp +++ b/src/java_bytecode/java_string_literals.cpp @@ -162,7 +162,16 @@ symbol_exprt get_or_create_string_literal_symbol( return_symbol.type = return_symbol.value.type(); if(symbol_table.add(return_symbol)) throw "failed to add return symbol to symbol table"; - new_symbol.value = literal_init; + + // Initialise the literal structure with + // (ABC_return_value, { ..., .length = N, .data = &ABC_constarray }), + // using a C-style comma expression to indicate that we need the + // _return_value global for its side-effects. + exprt init_comma_expr(ID_comma); + init_comma_expr.type() = literal_init.type(); + init_comma_expr.copy_to_operands(return_symbol.symbol_expr()); + init_comma_expr.move_to_operands(literal_init); + new_symbol.value = init_comma_expr; } else if(jls_struct.components().size()>=1 && jls_struct.components()[0].get_name()=="@java.lang.Object")