From 7884958e4bf7c339f51a511ff1bfb7927b0a13c1 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 11 Jan 2018 16:00:23 +0000 Subject: [PATCH 1/3] Remove returns: support running per-function This converts functions from conventional to via-global-variable return style on a function-by-function basis. It results in some complication, as the globals are now seen by the language's final pass, meaning it must ignore them for initialisation purposes, and we must cope with functions not having their "true" type while other functions are still being loaded. One test in cbmc-java failed because it was written in C, which we don't adapt to support incremental loading yet, so it is moved to the cbmc test directory, where it can still achieve its goal of checking whether uncaught_exceptions_analysist can tolerate CPROVER_assert and similar builtins, since cbmc also runs the remove_exceptions pass. --- .../uncaught_exceptions_analysis1}/main.c | 0 .../uncaught_exceptions_analysis1}/test.desc | 0 src/goto-programs/remove_returns.cpp | 239 ++++++++++++------ src/goto-programs/remove_returns.h | 2 + .../java_bytecode_convert_method.cpp | 13 +- .../java_bytecode_convert_method.h | 3 +- .../java_bytecode_convert_method_class.h | 8 +- src/java_bytecode/java_bytecode_language.cpp | 3 +- src/java_bytecode/java_bytecode_language.h | 5 + src/java_bytecode/java_entry_point.cpp | 7 +- 10 files changed, 195 insertions(+), 85 deletions(-) rename regression/{cbmc-java/exceptions25 => cbmc/uncaught_exceptions_analysis1}/main.c (100%) rename regression/{cbmc-java/exceptions25 => cbmc/uncaught_exceptions_analysis1}/test.desc (100%) diff --git a/regression/cbmc-java/exceptions25/main.c b/regression/cbmc/uncaught_exceptions_analysis1/main.c similarity index 100% rename from regression/cbmc-java/exceptions25/main.c rename to regression/cbmc/uncaught_exceptions_analysis1/main.c diff --git a/regression/cbmc-java/exceptions25/test.desc b/regression/cbmc/uncaught_exceptions_analysis1/test.desc similarity index 100% rename from regression/cbmc-java/exceptions25/test.desc rename to regression/cbmc/uncaught_exceptions_analysis1/test.desc diff --git a/src/goto-programs/remove_returns.cpp b/src/goto-programs/remove_returns.cpp index 2ecc8c75779..d4eb6dd9ea6 100644 --- a/src/goto-programs/remove_returns.cpp +++ b/src/goto-programs/remove_returns.cpp @@ -27,6 +27,9 @@ class remove_returnst void operator()( goto_functionst &goto_functions); + void operator()( + goto_model_functiont &model_function); + void restore( goto_functionst &goto_functions); @@ -34,10 +37,13 @@ class remove_returnst symbol_tablet &symbol_table; void replace_returns( - goto_functionst::function_mapt::iterator f_it); + const irep_idt &function_id, + goto_functionst::goto_functiont &function); + + typedef std::function function_is_stubt; void do_function_calls( - goto_functionst &goto_functions, + function_is_stubt function_is_stub, goto_programt &goto_program); bool restore_returns( @@ -46,74 +52,90 @@ class remove_returnst void undo_function_calls( goto_functionst &goto_functions, goto_programt &goto_program); + + symbol_exprt get_or_create_return_value_symbol( + const irep_idt &function_id); }; +symbol_exprt remove_returnst::get_or_create_return_value_symbol( + const irep_idt &function_id) +{ + const irep_idt symbol_name = + id2string(function_id) + RETURN_VALUE_SUFFIX; + const symbolt *existing_symbol = symbol_table.lookup(symbol_name); + if(existing_symbol != nullptr) + return existing_symbol->symbol_expr(); + + const symbolt &function_symbol = symbol_table.lookup_ref(function_id); + const typet &return_type = to_code_type(function_symbol.type).return_type(); + + if(return_type == empty_typet()) + return symbol_exprt(); + + auxiliary_symbolt new_symbol; + new_symbol.is_static_lifetime = true; + new_symbol.module = function_symbol.module; + new_symbol.base_name = + id2string(function_symbol.base_name) + RETURN_VALUE_SUFFIX; + new_symbol.name = symbol_name; + new_symbol.mode = function_symbol.mode; + // If we're creating this for the first time, the target function cannot have + // been remove_return'd yet, so this will still be the "true" return type: + new_symbol.type = return_type; + + symbol_table.add(new_symbol); + return new_symbol.symbol_expr(); +} + /// turns 'return x' into an assignment to fkt#return_value +/// \param function_id: name of the function to transform +/// \param function: function to transform void remove_returnst::replace_returns( - goto_functionst::function_mapt::iterator f_it) + const irep_idt &function_id, + goto_functionst::goto_functiont &function) { - typet return_type=f_it->second.type.return_type(); - - const irep_idt function_id=f_it->first; + typet return_type=function.type.return_type(); // returns something but void? - bool has_return_value=return_type!=empty_typet(); + if(return_type == empty_typet()) + return; - if(has_return_value) - { - // look up the function symbol - symbolt &function_symbol=*symbol_table.get_writeable(function_id); - - // make the return type 'void' - f_it->second.type.return_type()=empty_typet(); - function_symbol.type=f_it->second.type; - - // add return_value symbol to symbol_table - auxiliary_symbolt new_symbol; - new_symbol.is_static_lifetime=true; - new_symbol.module=function_symbol.module; - new_symbol.base_name= - id2string(function_symbol.base_name)+RETURN_VALUE_SUFFIX; - new_symbol.name=id2string(function_symbol.name)+RETURN_VALUE_SUFFIX; - new_symbol.mode=function_symbol.mode; - new_symbol.type=return_type; - - symbol_table.add(new_symbol); - } + // add return_value symbol to symbol_table, if not already created: + symbol_exprt return_symbol = get_or_create_return_value_symbol(function_id); - goto_programt &goto_program=f_it->second.body; + // look up the function symbol + symbolt &function_symbol=*symbol_table.get_writeable(function_id); - if(goto_program.empty()) - return; + // make the return type 'void' + function.type.return_type()=empty_typet(); + function_symbol.type=function.type; - if(has_return_value) + goto_programt &goto_program=function.body; + + Forall_goto_program_instructions(i_it, goto_program) { - Forall_goto_program_instructions(i_it, goto_program) + if(i_it->is_return()) { - if(i_it->is_return()) - { - INVARIANT( - i_it->code.operands().size()==1, - "return instructions should have one operand"); - - // replace "return x;" by "fkt#return_value=x;" - symbol_exprt lhs_expr; - lhs_expr.set_identifier(id2string(function_id)+RETURN_VALUE_SUFFIX); - lhs_expr.type()=return_type; + INVARIANT( + i_it->code.operands().size()==1, + "return instructions should have one operand"); - code_assignt assignment(lhs_expr, i_it->code.op0()); + // replace "return x;" by "fkt#return_value=x;" + code_assignt assignment(return_symbol, i_it->code.op0()); - // now turn the `return' into `assignment' - i_it->type=ASSIGN; - i_it->code=assignment; - } + // now turn the `return' into `assignment' + i_it->type=ASSIGN; + i_it->code=assignment; } } } /// turns x=f(...) into f(...); lhs=f#return_value; +/// \param function_is_stub: function (irep_idt -> bool) that determines whether +/// a given function ID is a stub +/// \param goto_program: program to transform void remove_returnst::do_function_calls( - goto_functionst &goto_functions, + function_is_stubt function_is_stub, goto_programt &goto_program) { Forall_goto_program_instructions(i_it, goto_program) @@ -122,29 +144,40 @@ void remove_returnst::do_function_calls( { code_function_callt &function_call=to_code_function_call(i_it->code); - code_typet old_type=to_code_type(function_call.function().type()); + INVARIANT( + function_call.function().id()==ID_symbol, + "indirect function calls should have been removed prior to running " + "remove-returns"); + + const irep_idt function_id= + to_symbol_expr(function_call.function()).get_identifier(); + + symbol_exprt return_value; + typet old_return_type; + bool is_stub = function_is_stub(function_id); + + if(is_stub) + { + old_return_type = + to_code_type(function_call.function().type()).return_type(); + } + else + { + // The callee may or may not already have been transformed by this pass, + // so its symbol-table entry may already have void return type. + // To simplify matters, create its return-value global now (if not + // already done), and use that to determine its true return type. + return_value = get_or_create_return_value_symbol(function_id); + if(return_value == symbol_exprt()) // really void-typed? + continue; + old_return_type = return_value.type(); + } // Do we return anything? - if(old_type.return_type()!=empty_typet()) + if(old_return_type!=empty_typet()) { // replace "lhs=f(...)" by // "f(...); lhs=f#return_value; DEAD f#return_value;" - INVARIANT( - function_call.function().id()==ID_symbol, - "indirect function calls should have been removed prior to running " - "remove-returns"); - - const irep_idt function_id= - to_symbol_expr(function_call.function()).get_identifier(); - - // see if we have a body - goto_functionst::function_mapt::const_iterator - f_it=goto_functions.function_map.find(function_id); - - if(f_it==goto_functions.function_map.end()) - throw - "failed to find function `"+id2string(function_id)+ - "' in function map"; // fix the type to_code_type(function_call.function().type()).return_type()= @@ -154,18 +187,10 @@ void remove_returnst::do_function_calls( { exprt rhs; - if(f_it->second.body_available()) - { - symbol_exprt return_value; - return_value.type()=function_call.lhs().type(); - return_value.set_identifier( - id2string(function_id)+RETURN_VALUE_SUFFIX); + if(!is_stub) rhs=return_value; - } else - { rhs=side_effect_expr_nondett(function_call.lhs().type()); - } goto_programt::targett t_a=goto_program.insert_after(i_it); t_a->make_assignment(); @@ -176,7 +201,7 @@ void remove_returnst::do_function_calls( // fry the previous assignment function_call.lhs().make_nil(); - if(f_it->second.body_available()) + if(!is_stub) { goto_programt::targett t_d=goto_program.insert_after(t_a); t_d->make_dead(); @@ -194,11 +219,47 @@ void remove_returnst::operator()(goto_functionst &goto_functions) { Forall_goto_functions(it, goto_functions) { - replace_returns(it); - do_function_calls(goto_functions, it->second.body); + // NOLINTNEXTLINE + auto function_is_stub = [&goto_functions](const irep_idt &function_id) { + auto findit = goto_functions.function_map.find(function_id); + INVARIANT( + findit != goto_functions.function_map.end(), + "called function should have some entry in the function map"); + return !findit->second.body_available(); + }; + + replace_returns(it->first, it->second); + do_function_calls(function_is_stub, it->second.body); } } +void remove_returnst::operator()(goto_model_functiont &model_function) +{ + symbol_tablet &symbol_table = model_function.get_symbol_table(); + goto_functionst::goto_functiont &goto_function = + model_function.get_goto_function(); + + // If this is a stub it doesn't have a corresponding #return_value, + // not any return instructions to alter: + if(goto_function.body.empty()) + return; + + // NOLINTNEXTLINE + auto function_is_stub = [&symbol_table](const irep_idt &function_id) { + const symbolt &function_symbol = symbol_table.lookup_ref(function_id); + INVARIANT( + function_symbol.mode == ID_java, + "only Java currently annotates its stubs in the symbol table; to use " + "remove-returns per-function with another language, annotate its stub " + "symbol table entries with ID_C_incomplete and amend this invariant"); + return function_symbol.type.get_bool(ID_C_incomplete); + }; + + replace_returns( + goto_programt::get_function_id(goto_function.body), goto_function); + do_function_calls(function_is_stub, goto_function.body); +} + /// removes returns void remove_returns( symbol_tablet &symbol_table, @@ -208,6 +269,21 @@ void remove_returns( rr(goto_functions); } +/// Removes returns from a single function. Only usable with Java programs at +/// the moment; to use it with other languages, they must annotate their stub +/// functions with ID_C_incomplete as currently done in +/// java_bytecode_convert_method.cpp. +/// +/// This will generate \#return_value variables, if not already present, for +/// both the function being altered *and* any callees. +/// \param goto_model_function: function to transform +void remove_returns( + goto_model_functiont &goto_model_function) +{ + remove_returnst rr(goto_model_function.get_symbol_table()); + rr(goto_model_function); +} + /// removes returns void remove_returns(goto_modelt &goto_model) { @@ -215,6 +291,11 @@ void remove_returns(goto_modelt &goto_model) rr(goto_model.goto_functions); } +/// Get code type of a function that has had remove_returns run upon it +/// \param symbol_table: global symbol table +/// \param function_id: function to get the type of +/// \return the function's type with its `return_type()` restored to its +/// original value if a \#return_value variable exists, or nil otherwise code_typet original_return_type( const symbol_tablet &symbol_table, const irep_idt &function_id) diff --git a/src/goto-programs/remove_returns.h b/src/goto-programs/remove_returns.h index 04b113cff18..578f97adf7a 100644 --- a/src/goto-programs/remove_returns.h +++ b/src/goto-programs/remove_returns.h @@ -24,6 +24,8 @@ Date: September 2009 void remove_returns(symbol_tablet &, goto_functionst &); +void remove_returns(goto_model_functiont &); + void remove_returns(goto_modelt &); // reverse the above operations diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index fb9b18bd871..3fe1facb71d 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1519,6 +1519,13 @@ codet java_bytecode_convert_methodt::convert_instructions( id2string(symbol.base_name)+"()"; symbol.type=arg0.type(); symbol.type.set(ID_access, ID_public); + // If string refinement can provide a body this will become a "real" + // function in due time; otherwise it will remain a stub. + if((!driver_program_provides_stubs) && + (!string_preprocess.implements_function(id))) + { + symbol.type.set(ID_C_incomplete, true); + } symbol.value.make_nil(); symbol.mode=ID_java; assign_parameter_names( @@ -2938,7 +2945,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, size_t max_array_length, optionalt needed_lazy_methods, - java_string_library_preprocesst &string_preprocess) + java_string_library_preprocesst &string_preprocess, + bool driver_program_provides_stubs) { static const std::unordered_set methods_to_ignore { @@ -2969,7 +2977,8 @@ void java_bytecode_convert_method( message_handler, max_array_length, needed_lazy_methods, - string_preprocess); + string_preprocess, + driver_program_provides_stubs); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index b27fd1cf898..67ee0eb878c 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -34,7 +34,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, size_t max_array_length, optionalt needed_lazy_methods, - java_string_library_preprocesst &string_preprocess); + java_string_library_preprocesst &string_preprocess, + bool driver_program_provides_stubs); void java_bytecode_convert_method_lazy( const symbolt &class_symbol, diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 0d1c34d82db..e1032e10b80 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -35,13 +35,15 @@ class java_bytecode_convert_methodt:public messaget message_handlert &_message_handler, size_t _max_array_length, optionalt needed_lazy_methods, - java_string_library_preprocesst &_string_preprocess) + java_string_library_preprocesst &_string_preprocess, + bool driver_program_provides_stubs) : messaget(_message_handler), symbol_table(symbol_table), max_array_length(_max_array_length), needed_lazy_methods(std::move(needed_lazy_methods)), string_preprocess(_string_preprocess), slots_for_parameters(0), + driver_program_provides_stubs(driver_program_provides_stubs), method_has_this(false) { } @@ -81,6 +83,10 @@ class java_bytecode_convert_methodt:public messaget /// Initialized in `convert`. unsigned slots_for_parameters; + /// True if the driver program will provide bodies for all stub functions; + /// will be used to annotate stubs created upon discovery of a callsite. + bool driver_program_provides_stubs; + public: struct holet { diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 73c23711f03..01913fc5828 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -480,7 +480,8 @@ bool java_bytecode_languaget::convert_single_method( get_message_handler(), max_user_array_length, std::move(needed_lazy_methods), - string_preprocess); + string_preprocess, + driver_program_provides_stubs()); return false; } diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 905c56afb6e..4bac49c27b5 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -188,6 +188,11 @@ class java_bytecode_languaget:public languaget std::vector java_load_classes; private: + /// Should be overridden by driver programs that will provide stub methods, + /// i.e. they pledge to replace any function that cannot be elaborated here + /// with some function body. + virtual bool driver_program_provides_stubs() { return false; } + const std::unique_ptr pointer_type_selector; }; diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index eab1a397286..255d97742e6 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -31,6 +31,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_object_factory.h" #include "java_types.h" @@ -73,7 +74,11 @@ static bool should_init_symbol(const symbolt &sym) sym.is_state_var && sym.is_static_lifetime && sym.mode==ID_java) - return true; + { + // Consider some sort of annotation indicating a global variable that + // doesn't require initialisation? + return !has_suffix(id2string(sym.name), RETURN_VALUE_SUFFIX); + } return is_java_string_literal_id(sym.name); } From 032b29988f1a91b1af8610cfdf4529fdebc1d5fa Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 12 Jan 2018 14:13:13 +0000 Subject: [PATCH 2/3] JBMC: remove return values on a per-function basis --- src/jbmc/jbmc_parse_options.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/jbmc/jbmc_parse_options.cpp b/src/jbmc/jbmc_parse_options.cpp index 8a18c670b45..61e9e31b92d 100644 --- a/src/jbmc/jbmc_parse_options.cpp +++ b/src/jbmc/jbmc_parse_options.cpp @@ -649,6 +649,8 @@ void jbmc_parse_optionst::process_goto_function( remove_instanceof(goto_function, symbol_table); // Java virtual functions -> explicit dispatch tables: remove_virtual_functions(function); + // remove returns + remove_returns(function); } catch(const char *e) @@ -686,9 +688,6 @@ bool jbmc_parse_optionst::process_goto_functions( // instrument library preconditions instrument_preconditions(goto_model); - // remove returns, gcc vectors, complex - remove_returns(goto_model); - // Similar removal of java nondet statements: // TODO Should really get this from java_bytecode_language somehow, but we // don't have an instance of that here. From 24743d10d171fd408abe2a5487b0515aad2b7704 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 15 Jan 2018 14:27:36 +0000 Subject: [PATCH 3/3] JBMC: run replace-Java-nondet on function-by-function basis --- src/goto-programs/replace_java_nondet.cpp | 10 ++++++++++ src/goto-programs/replace_java_nondet.h | 6 ++++++ src/jbmc/jbmc_parse_options.cpp | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/goto-programs/replace_java_nondet.cpp b/src/goto-programs/replace_java_nondet.cpp index b030cbaa312..431acc0efb2 100644 --- a/src/goto-programs/replace_java_nondet.cpp +++ b/src/goto-programs/replace_java_nondet.cpp @@ -235,6 +235,16 @@ static void replace_java_nondet(goto_programt &goto_program) } } +void replace_java_nondet(goto_model_functiont &function) +{ + goto_programt &program = function.get_goto_function().body; + replace_java_nondet(program); + + function.compute_location_numbers(); + + remove_skip(program); +} + void replace_java_nondet(goto_functionst &goto_functions) { for(auto &goto_program : goto_functions.function_map) diff --git a/src/goto-programs/replace_java_nondet.h b/src/goto-programs/replace_java_nondet.h index ac8f8cfb005..3653908588d 100644 --- a/src/goto-programs/replace_java_nondet.h +++ b/src/goto-programs/replace_java_nondet.h @@ -14,6 +14,7 @@ Author: Reuben Thomas, reuben.thomas@diffblue.com class goto_modelt; class goto_functionst; +class goto_model_functiont; /// Replace calls to nondet library functions with an internal nondet /// representation. @@ -22,4 +23,9 @@ void replace_java_nondet(goto_modelt &); void replace_java_nondet(goto_functionst &); +/// Replace calls to nondet library functions with an internal nondet +/// representation in a single function. +/// \param function: The goto program to modify. +void replace_java_nondet(goto_model_functiont &function); + #endif diff --git a/src/jbmc/jbmc_parse_options.cpp b/src/jbmc/jbmc_parse_options.cpp index 61e9e31b92d..fbb7b5251cd 100644 --- a/src/jbmc/jbmc_parse_options.cpp +++ b/src/jbmc/jbmc_parse_options.cpp @@ -651,6 +651,8 @@ void jbmc_parse_optionst::process_goto_function( remove_virtual_functions(function); // remove returns remove_returns(function); + + replace_java_nondet(function); } catch(const char *e) @@ -705,8 +707,6 @@ bool jbmc_parse_optionst::process_goto_functions( ? std::stoul(cmdline.get_value("java-max-input-tree-depth")) : MAX_NONDET_TREE_DEPTH; - replace_java_nondet(goto_model); - convert_nondet( goto_model, get_message_handler(),