From 06a220ace57c7266d2a215719c12db5a6528a527 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 8 Feb 2018 15:12:27 +0000 Subject: [PATCH] Reimplement remove-static-init-loops to avoid need to inspect GOTO program This was supplying synthetic --unwind parameters for each loop found in the GOTO program, but that required early access to the GOTO code, which will not be available under symex- driven lazy loading. Therefore we replace that pass with a handler mechanism that allows the driver program to provide its own callback to make unwinding decisions, and use it in JBMC to ensure that enumeration type static initialisers are unwound sufficiently. --- src/cbmc/bmc.h | 11 ++ src/cbmc/cbmc_parse_options.cpp | 1 - src/cbmc/symex_bmc.cpp | 95 +++++++++---- src/cbmc/symex_bmc.h | 46 +++++++ src/goto-programs/Makefile | 1 - .../remove_static_init_loops.cpp | 128 ------------------ src/goto-programs/remove_static_init_loops.h | 36 ----- src/java_bytecode/Makefile | 1 + .../java_bytecode_convert_class.cpp | 6 + .../java_enum_static_init_unwind_handler.cpp | 57 ++++++++ .../java_enum_static_init_unwind_handler.h | 25 ++++ src/jbmc/jbmc_parse_options.cpp | 25 +++- 12 files changed, 232 insertions(+), 200 deletions(-) delete mode 100644 src/goto-programs/remove_static_init_loops.cpp delete mode 100644 src/goto-programs/remove_static_init_loops.h create mode 100644 src/java_bytecode/java_enum_static_init_unwind_handler.cpp create mode 100644 src/java_bytecode/java_enum_static_init_unwind_handler.h diff --git a/src/cbmc/bmc.h b/src/cbmc/bmc.h index 40673724703..6a0f7297a18 100644 --- a/src/cbmc/bmc.h +++ b/src/cbmc/bmc.h @@ -71,6 +71,17 @@ class bmct:public safety_checkert return run(goto_functions); } + void add_loop_unwind_handler(symex_bmct::loop_unwind_handlert handler) + { + symex.add_loop_unwind_handler(handler); + } + + void add_unwind_recursion_handler( + symex_bmct::recursion_unwind_handlert handler) + { + symex.add_recursion_unwind_handler(handler); + } + protected: const optionst &options; symbol_tablet new_symbol_table; diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 1b83f244e01..c6f0edfc9ee 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -45,7 +45,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include -#include #include #include #include diff --git a/src/cbmc/symex_bmc.cpp b/src/cbmc/symex_bmc.cpp index fdc34de4b1c..31bc6acbb19 100644 --- a/src/cbmc/symex_bmc.cpp +++ b/src/cbmc/symex_bmc.cpp @@ -110,27 +110,49 @@ bool symex_bmct::get_unwind( { const irep_idt id=goto_programt::loop_id(*source.pc); - // We use the most specific limit we have, - // and 'infinity' when we have none. - + tvt abort_unwind_decision; unsigned this_loop_limit=std::numeric_limits::max(); - loop_limitst &this_thread_limits= - thread_loop_limits[source.thread_nr]; + for(auto handler : loop_unwind_handlers) + { + abort_unwind_decision = + handler( + source.pc->function, + source.pc->loop_number, + unwind, + this_loop_limit); + if(abort_unwind_decision.is_known()) + break; + } - loop_limitst::const_iterator l_it=this_thread_limits.find(id); - if(l_it!=this_thread_limits.end()) - this_loop_limit=l_it->second; - else + // If no handler gave an opinion, use standard command-line --unwindset + // / --unwind options to decide: + if(abort_unwind_decision.is_unknown()) { - l_it=loop_limits.find(id); - if(l_it!=loop_limits.end()) + // We use the most specific limit we have, + // and 'infinity' when we have none. + + loop_limitst &this_thread_limits= + thread_loop_limits[source.thread_nr]; + + loop_limitst::const_iterator l_it=this_thread_limits.find(id); + if(l_it!=this_thread_limits.end()) this_loop_limit=l_it->second; - else if(max_unwind_is_set) - this_loop_limit=max_unwind; + else + { + l_it=loop_limits.find(id); + if(l_it!=loop_limits.end()) + this_loop_limit=l_it->second; + else if(max_unwind_is_set) + this_loop_limit=max_unwind; + } + + abort_unwind_decision = tvt(unwind >= this_loop_limit); } - bool abort=unwind>=this_loop_limit; + INVARIANT( + abort_unwind_decision.is_known(), "unwind decision should be taken by now"); + bool abort = abort_unwind_decision.is_true(); log.statistics() << (abort ? "Not unwinding" : "Unwinding") << " loop " << id << " iteration " << unwind; @@ -149,27 +171,44 @@ bool symex_bmct::get_unwind_recursion( const unsigned thread_nr, unsigned unwind) { - // We use the most specific limit we have, - // and 'infinity' when we have none. - + tvt abort_unwind_decision; unsigned this_loop_limit=std::numeric_limits::max(); - loop_limitst &this_thread_limits= - thread_loop_limits[thread_nr]; + for(auto handler : recursion_unwind_handlers) + { + abort_unwind_decision = handler(id, unwind, this_loop_limit); + if(abort_unwind_decision.is_known()) + break; + } - loop_limitst::const_iterator l_it=this_thread_limits.find(id); - if(l_it!=this_thread_limits.end()) - this_loop_limit=l_it->second; - else + // If no handler gave an opinion, use standard command-line --unwindset + // / --unwind options to decide: + if(abort_unwind_decision.is_unknown()) { - l_it=loop_limits.find(id); - if(l_it!=loop_limits.end()) + // We use the most specific limit we have, + // and 'infinity' when we have none. + + loop_limitst &this_thread_limits= + thread_loop_limits[thread_nr]; + + loop_limitst::const_iterator l_it=this_thread_limits.find(id); + if(l_it!=this_thread_limits.end()) this_loop_limit=l_it->second; - else if(max_unwind_is_set) - this_loop_limit=max_unwind; + else + { + l_it=loop_limits.find(id); + if(l_it!=loop_limits.end()) + this_loop_limit=l_it->second; + else if(max_unwind_is_set) + this_loop_limit=max_unwind; + } + + abort_unwind_decision = tvt(unwind>this_loop_limit); } - bool abort=unwind>this_loop_limit; + INVARIANT( + abort_unwind_decision.is_known(), "unwind decision should be taken by now"); + bool abort = abort_unwind_decision.is_true(); if(unwind>0 || abort) { diff --git a/src/cbmc/symex_bmc.h b/src/cbmc/symex_bmc.h index ba483840be2..fb43bca88ee 100644 --- a/src/cbmc/symex_bmc.h +++ b/src/cbmc/symex_bmc.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_CBMC_SYMEX_BMC_H #include +#include #include @@ -53,6 +54,43 @@ class symex_bmct: public goto_symext loop_limits[id]=limit; } + /// Loop unwind handlers take the function ID and loop number, the unwind + /// count so far, and an out-parameter specifying an advisory maximum, which + /// they may set. If set the advisory maximum is set it is *only* used to + /// print useful information for the user (e.g. "unwinding iteration N, max + /// M"), and is not enforced. They return true to halt unwinding, false to + /// authorise unwinding, or Unknown to indicate they have no opinion. + typedef + std::function + loop_unwind_handlert; + + /// Recursion unwind handlers take the function ID, the unwind count so far, + /// and an out-parameter specifying an advisory maximum, which they may set. + /// If set the advisory maximum is set it is *only* used to print useful + /// information for the user (e.g. "unwinding iteration N, max M"), + /// and is not enforced. They return true to halt unwinding, false to + /// authorise unwinding, or Unknown to indicate they have no opinion. + typedef std::function + recursion_unwind_handlert; + + /// Add a callback function that will be called to determine whether to unwind + /// loops. The first function added will get the first chance to answer, and + /// the first authoratitive (true or false) answer is final. + /// \param handler: new callback + void add_loop_unwind_handler(loop_unwind_handlert handler) + { + loop_unwind_handlers.push_back(handler); + } + + /// Add a callback function that will be called to determine whether to unwind + /// recursion. The first function added will get the first chance to answer, + /// and the first authoratitive (true or false) answer is final. + /// \param handler: new callback + void add_recursion_unwind_handler(recursion_unwind_handlert handler) + { + recursion_unwind_handlers.push_back(handler); + } + bool output_coverage_report( const goto_functionst &goto_functions, const std::string &path) const @@ -67,6 +105,8 @@ class symex_bmct: public goto_symext // 1) a global limit (max_unwind) // 2) a limit per loop, all threads // 3) a limit for a particular thread. + // 4) zero or more handler functions that can special-case particular + // functions or loops // We use the most specific of the above. unsigned max_unwind; @@ -78,6 +118,12 @@ class symex_bmct: public goto_symext typedef std::map thread_loop_limitst; thread_loop_limitst thread_loop_limits; + /// Callbacks that may provide an unwind/do-not-unwind decision for a loop + std::vector loop_unwind_handlers; + /// Callbacks that may provide an unwind/do-not-unwind decision for a + /// recursive call + std::vector recursion_unwind_handlers; + // // overloaded from goto_symext // diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index 38a75637776..e12c9e5f145 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -47,7 +47,6 @@ SRC = basic_blocks.cpp \ remove_instanceof.cpp \ remove_returns.cpp \ remove_skip.cpp \ - remove_static_init_loops.cpp \ remove_unreachable.cpp \ remove_unused_functions.cpp \ remove_vector.cpp \ diff --git a/src/goto-programs/remove_static_init_loops.cpp b/src/goto-programs/remove_static_init_loops.cpp deleted file mode 100644 index e9411beeb5b..00000000000 --- a/src/goto-programs/remove_static_init_loops.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/*******************************************************************\ - -Module: Unwind loops in static initializers - -Author: Daniel Kroening, kroening@kroening.com - -\*******************************************************************/ - -/// \file -/// Unwind loops in static initializers - -#include "remove_static_init_loops.h" -#include "goto_model.h" - -#include - -#include -#include - -class remove_static_init_loopst -{ -public: - explicit remove_static_init_loopst( - const symbol_tablet &_symbol_table): - symbol_table(_symbol_table) - {} - - void unwind_enum_static( - const goto_functionst &goto_functions, - optionst &options, - message_handlert &); -protected: - const symbol_tablet &symbol_table; -}; - -/// unwind static initialization loops of Java enums as far as the enum has -/// elements, thus flattening them completely -/// \par parameters: goto_functions and options -/// \return side effect is adding loops to unwindset -void remove_static_init_loopst::unwind_enum_static( - const goto_functionst &goto_functions, - optionst &options, - message_handlert &msg) -{ - size_t unwind_max=0; - messaget message; - message.set_message_handler(msg); - forall_goto_functions(f, goto_functions) - { - auto &p=f->second.body; - for(const auto &ins : p.instructions) - { - // is this a loop? - if(ins.is_backwards_goto()) - { - size_t loop_id=ins.loop_number; - const std::string java_clinit=":()V"; - const std::string &fname=id2string(ins.function); - size_t class_prefix_length=fname.find_last_of('.'); - // is the function symbol in the symbol table? - const auto maybe_symbol=symbol_table.lookup(ins.function); - if(!maybe_symbol) - { - message.warning() << "function `" << id2string(ins.function) - << "` is not in symbol table" << messaget::eom; - continue; - } - // is Java function and static init? - const symbolt &function_name=*maybe_symbol; - if(!(function_name.mode==ID_java && has_suffix(fname, java_clinit))) - continue; - assert( - class_prefix_length!=std::string::npos && - "could not identify class name"); - const std::string &classname=fname.substr(0, class_prefix_length); - const symbolt &class_symbol=*symbol_table.lookup(classname); - const class_typet &class_type=to_class_type(class_symbol.type); - size_t unwinds=class_type.get_size_t(ID_java_enum_static_unwind); - - unwind_max=std::max(unwind_max, unwinds); - if(unwinds>0) - { - const std::string &set=options.get_option("unwindset"); - std::string newset; - if(set!="") - newset=","; - newset+= - id2string(ins.function)+"."+std::to_string(loop_id)+":"+ - std::to_string(unwinds); - options.set_option("unwindset", set+newset); - } - } - } - } - const std::string &vla_length=options.get_option("java-max-vla-length"); - if(!vla_length.empty()) - { - size_t max_vla_length=safe_string2unsigned(vla_length); - if(max_vla_length due to insufficient max vla length"; - } -} - -/// this is the entry point for the removal of loops in static initialization -/// code of Java enums -/// \par parameters: symbol table, goto_functions and options -/// \return side effect is adding loops to unwindset -void remove_static_init_loops( - const goto_modelt &goto_model, - optionst &options, - message_handlert &msg) -{ - remove_static_init_loops( - goto_model.symbol_table, - goto_model.goto_functions, - options, - msg); -} - -void remove_static_init_loops( - const symbol_tablet &symbol_table, - const goto_functionst &goto_functions, - optionst &options, - message_handlert &msg) -{ - remove_static_init_loopst remove_loops(symbol_table); - remove_loops.unwind_enum_static(goto_functions, options, msg); -} diff --git a/src/goto-programs/remove_static_init_loops.h b/src/goto-programs/remove_static_init_loops.h deleted file mode 100644 index a78001046aa..00000000000 --- a/src/goto-programs/remove_static_init_loops.h +++ /dev/null @@ -1,36 +0,0 @@ -/*******************************************************************\ - -Module: Unwind loops in static initializers - -Author: Daniel Kroening, kroening@kroening.com - -\*******************************************************************/ - -/// \file -/// Unwind loops in static initializers - -#ifndef CPROVER_GOTO_PROGRAMS_REMOVE_STATIC_INIT_LOOPS_H -#define CPROVER_GOTO_PROGRAMS_REMOVE_STATIC_INIT_LOOPS_H - -#include - -#include -#include -#include - -class goto_modelt; -class symbol_tablet; -class goto_functionst; - -void remove_static_init_loops( - const goto_modelt &, - optionst &, - message_handlert &); - -void remove_static_init_loops( - const symbol_tablet &symbol_table, - const goto_functionst &goto_functions, - optionst &, - message_handlert &); - -#endif // CPROVER_GOTO_PROGRAMS_REMOVE_STATIC_INIT_LOOPS_H diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index bcf7fd1e5b2..a868fdd5c90 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -18,6 +18,7 @@ SRC = bytecode_info.cpp \ java_bytecode_typecheck_type.cpp \ java_class_loader.cpp \ java_class_loader_limit.cpp \ + java_enum_static_init_unwind_handler.cpp \ java_entry_point.cpp \ java_local_variable_table.cpp \ java_object_factory.cpp \ diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 8942853075a..12b87375e77 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -206,6 +206,12 @@ void java_bytecode_convert_classt::convert(const classt &c) class_type.set(ID_abstract, c.is_abstract); if(c.is_enum) { + if(max_array_length != 0 && c.enum_elements > max_array_length) + { + warning() << "Java Enum " << c.name << " won't work properly because max " + << "array length (" << max_array_length << ") is less than the " + << "enum size (" << c.enum_elements << ")" << eom; + } class_type.set( ID_java_enum_static_unwind, std::to_string(c.enum_elements+1)); diff --git a/src/java_bytecode/java_enum_static_init_unwind_handler.cpp b/src/java_bytecode/java_enum_static_init_unwind_handler.cpp new file mode 100644 index 00000000000..2c196ff5a42 --- /dev/null +++ b/src/java_bytecode/java_enum_static_init_unwind_handler.cpp @@ -0,0 +1,57 @@ +/*******************************************************************\ + +Module: Unwind loops in static initializers + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +/// \file +/// Unwind loops in static initializers + +#include "java_enum_static_init_unwind_handler.h" + +#include + +/// Unwind handler that special-cases the clinit (static initializer) functions +/// of enumeration classes. When java_bytecode_convert_classt has annotated them +/// with a size of the enumeration type, this forces unwinding of any loop in +/// the static initializer to at least that many iterations, with intent to +/// permit population of the enumeration's value array. +/// \param function_id: function the loop is in +/// \param loop_number: ordinal number of the loop (ignored) +/// \param unwind_count: iteration count that is about to begin +/// \param [out] unwind_max: may be set to an advisory (unenforced) maximum when +/// we know the total iteration count +/// \param symbol_table: global symbol table +/// \return false if loop_id belongs to an enumeration's static initializer and +/// unwind_count is <= the enumeration size, or unknown (defer / no decision) +/// otherwise. +tvt java_enum_static_init_unwind_handler( + const irep_idt &function_id, + unsigned loop_number, + unsigned unwind_count, + unsigned &unwind_max, + const symbol_tablet &symbol_table) +{ + const std::string &function_str = id2string(function_id); + if(!has_suffix(function_str, ".:()V")) + return tvt::unknown(); + + const symbolt &function_symbol = symbol_table.lookup_ref(function_str); + irep_idt class_id = function_symbol.type.get(ID_C_class); + INVARIANT( + !class_id.empty(), "functions should have their defining class annotated"); + + const typet &class_type = symbol_table.lookup_ref(class_id).type; + size_t unwinds = class_type.get_size_t(ID_java_enum_static_unwind); + if(unwinds != 0 && unwind_count < unwinds) + { + unwind_max = unwinds; + return tvt(false); // Must unwind + } + else + { + return tvt::unknown(); // Defer to other unwind handlers + } +} diff --git a/src/java_bytecode/java_enum_static_init_unwind_handler.h b/src/java_bytecode/java_enum_static_init_unwind_handler.h new file mode 100644 index 00000000000..489a638526f --- /dev/null +++ b/src/java_bytecode/java_enum_static_init_unwind_handler.h @@ -0,0 +1,25 @@ +/*******************************************************************\ + +Module: Unwind loops in static initializers + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +/// \file +/// Unwind loops in static initializers + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_ENUM_STATIC_INIT_UNWIND_HANDLER_H +#define CPROVER_JAVA_BYTECODE_JAVA_ENUM_STATIC_INIT_UNWIND_HANDLER_H + +#include +#include + +tvt java_enum_static_init_unwind_handler( + const irep_idt &function_id, + unsigned loop_number, + unsigned unwind_count, + unsigned &unwind_max, + const symbol_tablet &symbol_table); + +#endif // CPROVER_JAVA_BYTECODE_JAVA_ENUM_STATIC_INIT_UNWIND_HANDLER_H diff --git a/src/jbmc/jbmc_parse_options.cpp b/src/jbmc/jbmc_parse_options.cpp index 2e5270a63fc..b0889adb4cb 100644 --- a/src/jbmc/jbmc_parse_options.cpp +++ b/src/jbmc/jbmc_parse_options.cpp @@ -39,7 +39,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include -#include #include #include #include @@ -57,6 +56,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include @@ -491,11 +491,6 @@ int jbmc_parse_optionst::doit() if(set_properties(goto_model)) return 7; // should contemplate EX_USAGE from sysexits.h - // unwinds loops to number of enum elements - // side effect: add this as explicit unwind to unwind set - if(options.get_bool_option("java-unwind-enum-static")) - remove_static_init_loops(goto_model, options, get_message_handler()); - // get solver cbmc_solverst jbmc_solvers( options, @@ -525,6 +520,24 @@ int jbmc_parse_optionst::doit() get_message_handler(), prop_conv); + // unwinds loops to number of enum elements + if(options.get_bool_option("java-unwind-enum-static")) + { + bmc.add_loop_unwind_handler( + [&goto_model] + (const irep_idt &function_id, + unsigned loop_number, + unsigned unwind, + unsigned &max_unwind) { // NOLINT (*) + return java_enum_static_init_unwind_handler( + function_id, + loop_number, + unwind, + max_unwind, + goto_model.symbol_table); + }); + } + // do actual BMC return do_bmc(bmc, goto_model); }