diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 3cbbac3a55b..cce5aaad42b 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -561,7 +561,7 @@ int goto_instrument_parse_optionst::doit() if(cmdline.isset("interpreter")) { status() << "Starting interpreter" << eom; - interpreter(symbol_table, goto_functions); + interpreter(symbol_table, goto_functions, get_message_handler()); return 0; } diff --git a/src/goto-programs/interpreter.cpp b/src/goto-programs/interpreter.cpp index 2b4841a7c22..8d1565db914 100644 --- a/src/goto-programs/interpreter.cpp +++ b/src/goto-programs/interpreter.cpp @@ -9,13 +9,23 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include +#include #include #include +#include +#include +#include +#include +#include #include "interpreter.h" #include "interpreter_class.h" +#include "remove_returns.h" + +const size_t interpretert::npos=std::numeric_limits::max(); /*******************************************************************\ @@ -30,9 +40,46 @@ Function: interpretert::operator() \*******************************************************************/ void interpretert::operator()() +{ + status() << "0- Initialize:" << eom; + initialize(true); + try + { + status() << "Type h for help\n" << eom; + + while(!done) + command(); + + status() << total_steps << "- Program End.\n" << eom; + } + catch (const char *e) + { + error() << e << "\n" << eom; + } + + while(!done) + command(); +} + +/*******************************************************************\ + +Function: interpretert::initialize + + Inputs: + + Outputs: + + Purpose: Initializes the memory map of the interpreter and + [optionally] runs up to the entry point (thus doing + the cprover initialization) + +\*******************************************************************/ + +void interpretert::initialize(bool init) { build_memory_map(); + total_steps=0; const goto_functionst::function_mapt::const_iterator main_it=goto_functions.function_map.find(goto_functionst::entry_point()); @@ -44,17 +91,27 @@ void interpretert::operator()() if(!goto_function.body_available()) throw "main has no body"; - PC=goto_function.body.instructions.begin(); + pc=goto_function.body.instructions.begin(); function=main_it; done=false; - - while(!done) + if(init) { + stack_depth=call_stack.size()+1; show_state(); - command(); - if(!done) + step(); + while(!done && (stack_depth<=call_stack.size()) && (stack_depth!=npos)) + { + show_state(); + step(); + } + while(!done && (call_stack.size()==0)) + { + show_state(); step(); + } + clear_input_flags(); + input_vars.clear(); } } @@ -66,26 +123,31 @@ Function: interpretert::show_state Outputs: - Purpose: + Purpose: displays the current position of the pc and corresponding + code \*******************************************************************/ void interpretert::show_state() { - std::cout << std::endl; - std::cout << "----------------------------------------------------" - << std::endl; + if(!show) + return; + status() << "\n" + << total_steps+1 + << " ----------------------------------------------------\n"; - if(PC==function->second.body.instructions.end()) + if(pc==function->second.body.instructions.end()) { - std::cout << "End of function `" - << function->first << "'" << std::endl; + status() << "End of function `" << function->first << "'\n"; } else function->second.body.output_instruction( - ns, function->first, std::cout, PC); + ns, + function->first, + status(), + pc); - std::cout << std::endl; + status() << eom; } /*******************************************************************\ @@ -96,7 +158,7 @@ Function: interpretert::command Outputs: - Purpose: + Purpose: reads a user command and executes it. \*******************************************************************/ @@ -111,9 +173,97 @@ void interpretert::command() } char ch=tolower(command[0]); - if(ch=='q') done=true; + else if(ch=='h') + { + status() + << "Interpreter help\n" + << "h: display this menu\n" + << "j: output json trace\n" + << "m: output memory dump\n" + << "o: output goto trace\n" + << "q: quit\n" + << "r: run until completion\n" + << "s#: step a number of instructions\n" + << "sa: step across a function\n" + << "so: step out of a function\n" + << eom; + } + else if(ch=='j') + { + jsont json_steps; + convert(ns, steps, json_steps); + ch=tolower(command[1]); + if(ch==' ') + { + std::ofstream file; + file.open(command+2); + if(file.is_open()) + { + json_steps.output(file); + file.close(); + return; + } + } + json_steps.output(result()); + } + else if(ch=='m') + { + ch=tolower(command[1]); + print_memory(ch=='i'); + } + else if(ch=='o') + { + ch=tolower(command[1]); + if(ch==' ') + { + std::ofstream file; + file.open(command+2); + if(file.is_open()) + { + steps.output(ns, file); + file.close(); + return; + } + } + steps.output(ns, result()); + } + else if(ch=='r') + { + ch=tolower(command[1]); + initialize(ch!='0'); + } + else if((ch=='s') || (ch==0)) + { + num_steps=1; + stack_depth=npos; + ch=tolower(command[1]); + if(ch=='e') + num_steps=npos; + else if(ch=='o') + stack_depth=call_stack.size(); + else if(ch=='a') + stack_depth=call_stack.size()+1; + else + { + num_steps=atoi(command+1); + if(num_steps==0) + num_steps=1; + } + while(!done && ((num_steps==npos) || ((num_steps--)>0))) + { + step(); + show_state(); + } + while(!done && (stack_depth<=call_stack.size()) && (stack_depth!=npos)) + { + step(); + show_state(); + } + return; + } + show_state(); } /*******************************************************************\ @@ -124,41 +274,50 @@ Function: interpretert::step Outputs: - Purpose: + Purpose: executes a single step and updates the program counter \*******************************************************************/ void interpretert::step() { - if(PC==function->second.body.instructions.end()) + total_steps++; + if(pc==function->second.body.instructions.end()) { if(call_stack.empty()) done=true; else { - PC=call_stack.top().return_PC; + pc=call_stack.top().return_pc; function=call_stack.top().return_function; - stack_pointer=call_stack.top().old_stack_pointer; + // TODO: this increases memory size quite quickly. + // Should check alternatives call_stack.pop(); } return; } - next_PC=PC; - next_PC++; + next_pc=pc; + next_pc++; - switch(PC->type) + steps.add_step(goto_trace_stept()); + goto_trace_stept &trace_step=steps.get_last_step(); + trace_step.thread_nr=thread_id; + trace_step.pc=pc; + switch(pc->type) { case GOTO: + trace_step.type=goto_trace_stept::GOTO; execute_goto(); break; case ASSUME: + trace_step.type=goto_trace_stept::ASSUME; execute_assume(); break; case ASSERT: + trace_step.type=goto_trace_stept::ASSERT; execute_assert(); break; @@ -167,38 +326,46 @@ void interpretert::step() break; case DECL: + trace_step.type=goto_trace_stept::DECL; execute_decl(); break; case SKIP: case LOCATION: + trace_step.type=goto_trace_stept::LOCATION; + break; case END_FUNCTION: + trace_step.type=goto_trace_stept::FUNCTION_RETURN; break; case RETURN: + trace_step.type=goto_trace_stept::FUNCTION_RETURN; if(call_stack.empty()) throw "RETURN without call"; // NOLINT(readability/throw) - if(PC->code.operands().size()==1 && + if(pc->code.operands().size()==1 && call_stack.top().return_value_address!=0) { - std::vector rhs; - evaluate(PC->code.op0(), rhs); + mp_vectort rhs; + evaluate(pc->code.op0(), rhs); assign(call_stack.top().return_value_address, rhs); } - next_PC=function->second.body.instructions.end(); + next_pc=function->second.body.instructions.end(); break; case ASSIGN: + trace_step.type=goto_trace_stept::ASSIGNMENT; execute_assign(); break; case FUNCTION_CALL: + trace_step.type=goto_trace_stept::FUNCTION_CALL; execute_function_call(); break; case START_THREAD: + trace_step.type=goto_trace_stept::SPAWN; throw "START_THREAD not yet implemented"; // NOLINT(readability/throw) case END_THREAD: @@ -206,19 +373,44 @@ void interpretert::step() break; case ATOMIC_BEGIN: + trace_step.type=goto_trace_stept::ATOMIC_BEGIN; throw "ATOMIC_BEGIN not yet implemented"; // NOLINT(readability/throw) case ATOMIC_END: + trace_step.type=goto_trace_stept::ATOMIC_END; throw "ATOMIC_END not yet implemented"; // NOLINT(readability/throw) case DEAD: - throw "DEAD not yet implemented"; // NOLINT(readability/throw) - + trace_step.type=goto_trace_stept::DEAD; + break; + case THROW: + trace_step.type=goto_trace_stept::GOTO; + while(!done && (pc->type!=CATCH)) + { + if(pc==function->second.body.instructions.end()) + { + if(call_stack.empty()) + done=true; + else + { + pc=call_stack.top().return_pc; + function=call_stack.top().return_function; + call_stack.pop(); + } + } + else + { + next_pc=pc; + next_pc++; + } + } + break; + case CATCH: + break; default: throw "encountered instruction with undefined instruction type"; } - - PC=next_PC; + pc=next_pc; } /*******************************************************************\ @@ -229,21 +421,21 @@ Function: interpretert::execute_goto Outputs: - Purpose: + Purpose: executes a goto instruction \*******************************************************************/ void interpretert::execute_goto() { - if(evaluate_boolean(PC->guard)) + if(evaluate_boolean(pc->guard)) { - if(PC->targets.empty()) + if(pc->targets.empty()) throw "taken goto without target"; - if(PC->targets.size()>=2) + if(pc->targets.size()>=2) throw "non-deterministic goto encountered"; - next_PC=PC->targets.front(); + next_pc=pc->targets.front(); } } @@ -255,19 +447,37 @@ Function: interpretert::execute_other Outputs: - Purpose: + Purpose: executes side effects of 'other' instructions \*******************************************************************/ void interpretert::execute_other() { - const irep_idt &statement=PC->code.get_statement(); - + const irep_idt &statement=pc->code.get_statement(); if(statement==ID_expression) { - assert(PC->code.operands().size()==1); - std::vector rhs; - evaluate(PC->code.op0(), rhs); + assert(pc->code.operands().size()==1); + mp_vectort rhs; + evaluate(pc->code.op0(), rhs); + } + else if(statement==ID_array_set) + { + mp_vectort tmp, rhs; + evaluate(pc->code.op1(), tmp); + mp_integer address=evaluate_address(pc->code.op0()); + size_t size=get_size(pc->code.op0().type()); + while(rhs.size()code.get_statement()==ID_decl); + assert(pc->code.get_statement()==ID_decl); +} + +/*******************************************************************\ + +Function: interpretert::get_component_id + + Inputs: an object and a memory offset + + Outputs: + + Purpose: retrieves the member at offset + +\*******************************************************************/ + +irep_idt interpretert::get_component_id( + const irep_idt &object, + unsigned offset) +{ + const symbolt &symbol=ns.lookup(object); + const typet real_type=ns.follow(symbol.type); + if(real_type.id()!=ID_struct) + throw "request for member of non-struct"; + + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + for(struct_typet::componentst::const_iterator it=components.begin(); + it!=components.end(); it++) + { + if(offset<=0) + return it->id(); + size_t size=get_size(it->type()); + offset-=size; + } + return object; +} + +/*******************************************************************\ + +Function: interpretert::get_type + + Inputs: + + Outputs: + + Purpose: returns the type object corresponding to id + +\*******************************************************************/ + +typet interpretert::get_type(const irep_idt &id) const +{ + dynamic_typest::const_iterator it=dynamic_types.find(id); + if(it==dynamic_types.end()) + return symbol_table.lookup(id).type; + return it->second; +} + +/*******************************************************************\ + +Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: retrives the constant value at memory location offset + as an object of type type + +\*******************************************************************/ + +exprt interpretert::get_value( + const typet &type, + std::size_t offset, + bool use_non_det) +{ + const typet real_type=ns.follow(type); + if(real_type.id()==ID_struct) + { + exprt result=struct_exprt(real_type); + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + + // Retrieve the values for the individual members + result.reserve_operands(components.size()); + for(struct_typet::componentst::const_iterator it=components.begin(); + it!=components.end(); it++) + { + size_t size=get_size(it->type()); + const exprt operand=get_value(it->type(), offset); + offset+=size; + result.copy_to_operands(operand); + } + return result; + } + else if(real_type.id()==ID_array) + { + // Get size of array + exprt result=array_exprt(to_array_type(real_type)); + const exprt &size_expr=static_cast(type.find(ID_size)); + size_t subtype_size=get_size(type.subtype()); + mp_integer mp_count; + to_integer(size_expr, mp_count); + unsigned count=integer2unsigned(mp_count); + + // Retrieve the value for each member in the array + result.reserve_operands(count); + for(unsigned i=0; i=0)) + return side_effect_expr_nondett(type); + mp_vectort rhs; + rhs.push_back(memory[offset].value); + return get_value(type, rhs); +} + +/*******************************************************************\ + + Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: returns the value at offset in the form of type given a + memory buffer rhs which is typically a structured type + +\*******************************************************************/ + +exprt interpretert::get_value( + const typet &type, + mp_vectort &rhs, + std::size_t offset) +{ + const typet real_type=ns.follow(type); + assert(!rhs.empty()); + + if(real_type.id()==ID_struct) + { + exprt result=struct_exprt(real_type); + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + + // Retrieve the values for the individual members + result.reserve_operands(components.size()); + for(const struct_union_typet::componentt &expr : components) + { + size_t size=get_size(expr.type()); + const exprt operand=get_value(expr.type(), rhs, offset); + offset+=size; + result.copy_to_operands(operand); + } + return result; + } + else if(real_type.id()==ID_array) + { + exprt result(ID_constant, type); + const exprt &size_expr=static_cast(type.find(ID_size)); + + // Get size of array + size_t subtype_size=get_size(type.subtype()); + mp_integer mp_count; + to_integer(size_expr, mp_count); + unsigned count=integer2unsigned(mp_count); + + // Retrieve the value for each member in the array + result.reserve_operands(count); + for(unsigned i=0; i object count " << memory.size() << eom; + throw "interpreter: reading from invalid pointer"; + } + else if(real_type.id()==ID_string) + { + // Strings are currently encoded by their irep_idt ID. + return constant_exprt( + irep_idt(rhs[offset].to_long(), 0), + type); + } + // Retrieve value of basic data type + return from_integer(rhs[offset], type); } /*******************************************************************\ @@ -298,29 +754,56 @@ Function: interpretert::execute_assign Outputs: - Purpose: + Purpose: executes the assign statement at the current pc value \*******************************************************************/ void interpretert::execute_assign() { const code_assignt &code_assign= - to_code_assign(PC->code); + to_code_assign(pc->code); - std::vector rhs; + mp_vectort rhs; evaluate(code_assign.rhs(), rhs); if(!rhs.empty()) { mp_integer address=evaluate_address(code_assign.lhs()); - unsigned size=get_size(code_assign.lhs().type()); + size_t size=get_size(code_assign.lhs().type()); if(size!=rhs.size()) - std::cout << "!! failed to obtain rhs (" - << rhs.size() << " vs. " - << size << ")" << std::endl; + error() << "!! failed to obtain rhs (" + << rhs.size() << " vs. " + << size << ")\n" + << eom; else + { + goto_trace_stept &trace_step=steps.get_last_step(); assign(address, rhs); + trace_step.full_lhs=code_assign.lhs(); + + // TODO: need to look at other cases on ssa_exprt + // (dereference should be handled on ssa) + if(ssa_exprt::can_build_identifier(trace_step.full_lhs)) + { + trace_step.lhs_object=ssa_exprt(trace_step.full_lhs); + } + trace_step.full_lhs_value=get_value(trace_step.full_lhs.type(), rhs); + trace_step.lhs_object_value=trace_step.full_lhs_value; + } + } + else if(code_assign.rhs().id()==ID_side_effect) + { + side_effect_exprt side_effect=to_side_effect_expr(code_assign.rhs()); + if(side_effect.get_statement()==ID_nondet) + { + unsigned address=integer2unsigned(evaluate_address(code_assign.lhs())); + size_t size=get_size(code_assign.lhs().type()); + for(size_t i=0; i &rhs) + const mp_vectort &rhs) { - for(unsigned i=0; iguard)) + if(!evaluate_boolean(pc->guard)) throw "assumption failed"; } @@ -384,8 +875,14 @@ Function: interpretert::execute_assert void interpretert::execute_assert() { - if(!evaluate_boolean(PC->guard)) - throw "assertion failed"; + if(!evaluate_boolean(pc->guard)) + { + if((target_assert==pc) || stop_on_assertion) + throw "program assertion reached"; + else if(show) + error() << "assertion failed at " << pc->location_number + << "\n" << eom; + } } /*******************************************************************\ @@ -403,7 +900,7 @@ Function: interpretert::execute_function_call void interpretert::execute_function_call() { const code_function_callt &function_call= - to_code_function_call(PC->code); + to_code_function_call(pc->code); // function to be called mp_integer a=evaluate_address(function_call.function()); @@ -413,8 +910,12 @@ void interpretert::execute_function_call() else if(a>=memory.size()) throw "out-of-range function call"; + // Retrieve the empty last trace step struct we pushed for this step + // of the interpreter run to fill it with the corresponding data + goto_trace_stept &trace_step=steps.get_last_step(); const memory_cellt &cell=memory[integer2size_t(a)]; const irep_idt &identifier=cell.identifier; + trace_step.identifier=identifier; const goto_functionst::function_mapt::const_iterator f_it= goto_functions.function_map.find(identifier); @@ -432,7 +933,7 @@ void interpretert::execute_function_call() return_value_address=0; // values of the arguments - std::vector > argument_values; + std::vector argument_values; argument_values.resize(function_call.arguments().size()); @@ -446,7 +947,7 @@ void interpretert::execute_function_call() call_stack.push(stack_framet()); stack_framet &frame=call_stack.top(); - frame.return_PC=next_PC; + frame.return_pc=next_pc; frame.return_function=function; frame.old_stack_pointer=stack_pointer; frame.return_value_address=return_value_address; @@ -458,24 +959,7 @@ void interpretert::execute_function_call() for(const auto &id : locals) { const symbolt &symbol=ns.lookup(id); - unsigned size=get_size(symbol.type); - - if(size!=0) - { - frame.local_map[id]=stack_pointer; - - for(unsigned i=0; i=memory.size()) - memory.resize(address+1); - memory[address].value=0; - memory[address].identifier=id; - memory[address].offset=i; - } - - stack_pointer+=size; - } + frame.local_map[id]=integer2unsigned(build_memory_map(id, symbol.type)); } // assign the arguments @@ -485,7 +969,7 @@ void interpretert::execute_function_call() if(argument_values.size()second.body.instructions.begin(); + next_pc=f_it->second.body.instructions.begin(); } else - throw "no body for "+id2string(identifier); + { + list_input_varst::iterator it= + function_input_vars.find(function_call.function().get(ID_identifier)); + if(it!=function_input_vars.end()) + { + mp_vectort value; + assert(!it->second.empty()); + assert(!it->second.front().return_assignments.empty()); + evaluate(it->second.front().return_assignments.back().value, value); + if(return_value_address>0) + { + assign(return_value_address, value); + } + it->second.pop_front(); + return; + } + if(show) + error() << "no body for "+id2string(identifier) << eom; + } } /*******************************************************************\ @@ -510,7 +1012,7 @@ Function: interpretert::build_memory_map Outputs: - Purpose: + Purpose: Creates a memory map of all static symbols in the program \*******************************************************************/ @@ -520,6 +1022,10 @@ void interpretert::build_memory_map() memory.resize(1); memory[0].offset=0; memory[0].identifier="NULL-OBJECT"; + memory[0].initialized=0; + + num_dynamic_objects=0; + dynamic_types.clear(); // now do regular static symbols for(const auto &s : symbol_table.symbols) @@ -543,7 +1049,7 @@ Function: interpretert::build_memory_map void interpretert::build_memory_map(const symbolt &symbol) { - unsigned size=0; + size_t size=0; if(symbol.type.id()==ID_code) { @@ -560,29 +1066,120 @@ void interpretert::build_memory_map(const symbolt &symbol) memory.resize(address+size); memory_map[symbol.name]=address; - for(unsigned i=0; i(type.find(ID_size)); + mp_vectort computed_size; + evaluate(size_expr, computed_size); + if(computed_size.size()==1 && + computed_size[0]>=0) + { + result() << "Concretized array with size " << computed_size[0] + << eom; + return array_typet(type.subtype(), + constant_exprt::integer_constant(computed_size[0].to_ulong())); + } + else + { + error() << "Failed to concretize variable array" + << eom; + } + } + return type; +} + +/*******************************************************************\ + +Function: interpretert::build_memory_map + + Inputs: + + Outputs: Updtaes the memory map to include variable id if it does + not exist + + Purpose: Populates dynamic entries of the memory map + +\*******************************************************************/ + +mp_integer interpretert::build_memory_map( + const irep_idt &id, + const typet &type) +{ + typet alloc_type=concretize_type(type); + size_t size=get_size(alloc_type); + auto it=dynamic_types.find(id); + + if(it!=dynamic_types.end()) + { + unsigned offset=1; + unsigned address=memory_map[id]; + while(memory[address+offset].offset>0) offset++; + + // current size <= size already recorded + if(size<=offset) + return memory_map[id]; + } + + // The current size is bigger then the one previously recorded + // in the memory map + + if(size==0) + size=1; // This is a hack to create existence + + unsigned address=memory.size(); + memory.resize(address+size); + memory_map[id]=address; + dynamic_types.insert(std::pair(id, alloc_type)); + + for(size_t i=0; i(type.find(ID_size)); - unsigned subtype_size=get_size(type.subtype()); + size_t subtype_size=get_size(type.subtype()); - mp_integer i; - if(!to_integer(size_expr, i)) - return subtype_size*integer2unsigned(i); - else - return subtype_size; + mp_vectort i; + evaluate(size_expr, i); + if(i.size()==1) + { + // Go via the binary representation to reproduce any + // overflow behaviour. + exprt size_const=from_integer(i[0], size_expr.type()); + mp_integer size_mp; + assert(!to_integer(size_const, size_mp)); + return subtype_size*integer2unsigned(size_mp); + } + return subtype_size; } else if(type.id()==ID_symbol) { return get_size(ns.follow(type)); } + return 1; +} + +/******************************************************************* \ + +Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +exprt interpretert::get_value(const irep_idt &id) +{ + // The dynamic type and the static symbol type may differ for VLAs, + // where the symbol carries a size expression and the dynamic type + // registry contains its actual length. + auto findit=dynamic_types.find(id); + typet get_type; + if(findit!=dynamic_types.end()) + get_type=findit->second; else - return 1; + get_type=symbol_table.lookup(id).type; + + symbol_exprt symbol_expr(id, get_type); + mp_integer whole_lhs_object_address=evaluate_address(symbol_expr); + + return get_value(get_type, integer2unsigned(whole_lhs_object_address)); } /*******************************************************************\ @@ -652,8 +1285,39 @@ Function: interpreter void interpreter( const symbol_tablet &symbol_table, - const goto_functionst &goto_functions) + const goto_functionst &goto_functions, + message_handlert &message_handler) { - interpretert interpreter(symbol_table, goto_functions); + interpretert interpreter( + symbol_table, + goto_functions, + message_handler); interpreter(); } + +/*******************************************************************\ + +Function: interpretert::print_memory + + Inputs: + + Outputs: + + Purpose: Prints the current state of the memory map + Since messaget mdofifies class members, print functions are nonconst + +\*******************************************************************/ + +void interpretert::print_memory(bool input_flags) +{ + for(size_t i=0; i(cell.initialized) << ")" + << eom; + debug() << eom; + } +} diff --git a/src/goto-programs/interpreter.h b/src/goto-programs/interpreter.h index bf909502793..1888a009969 100644 --- a/src/goto-programs/interpreter.h +++ b/src/goto-programs/interpreter.h @@ -9,10 +9,13 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_GOTO_PROGRAMS_INTERPRETER_H #define CPROVER_GOTO_PROGRAMS_INTERPRETER_H +#include + #include "goto_functions.h" void interpreter( const symbol_tablet &symbol_table, - const goto_functionst &goto_functions); + const goto_functionst &goto_functions, + message_handlert &message_handler); #endif // CPROVER_GOTO_PROGRAMS_INTERPRETER_H diff --git a/src/goto-programs/interpreter_class.h b/src/goto-programs/interpreter_class.h index 14c429a1a86..83836eea072 100644 --- a/src/goto-programs/interpreter_class.h +++ b/src/goto-programs/interpreter_class.h @@ -14,24 +14,93 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "goto_functions.h" +#include "goto_trace.h" +#include "json_goto_trace.h" +#include "util/message.h" -class interpretert +class interpretert:public messaget { public: interpretert( const symbol_tablet &_symbol_table, - const goto_functionst &_goto_functions): + const goto_functionst &_goto_functions, + message_handlert &_message_handler): + messaget(_message_handler), symbol_table(_symbol_table), ns(_symbol_table), - goto_functions(_goto_functions) + goto_functions(_goto_functions), + stop_on_assertion(false) { + show=true; } void operator()(); + void print_memory(bool input_flags); + + // An assertion that identifier 'id' carries value in some particular context. + // Used to record parameter (id) assignment (value) lists for function calls. + struct function_assignmentt + { + irep_idt id; + exprt value; + }; + + // A list of such assignments. + typedef std::vector function_assignmentst; + + typedef std::vector mp_vectort; + + // Maps an assignment id to the name of the parameter being assigned + typedef std::pair assignment_idt; + + // Identifies an expression before and after some change; + typedef std::pair diff_pairt; + + // Records a diff between an assignment pre and post conditions + typedef std::map side_effects_differencet; + + // A entry in the input_valuest map + typedef std::pair input_entryt; + + // List of the input variables with their corresponding initial values + typedef std::map input_valuest; + + // List of dynamically allocated symbols that are not in the symbol table + typedef std::map dynamic_typest; + + // An assignment list annotated with the calling context. + struct function_assignments_contextt + { + irep_idt calling_function; + function_assignmentst return_assignments; + function_assignmentst param_assignments; + }; + + // list_input_varst maps function identifiers onto a vector of [name = value] + // assignments per call to that function. + typedef std::list + function_assignments_contextst; + typedef std::map > + list_input_varst; + + const dynamic_typest &get_dynamic_types() { return dynamic_types; } protected: + struct trace_stack_entryt + { + irep_idt func_name; + mp_integer this_address; + irep_idt capture_symbol; + bool is_super_call; + std::vector param_values; + }; + typedef std::vector trace_stackt; + const symbol_tablet &symbol_table; + + // This is a cache so that we don't have to create it when a call needs it const namespacet ns; + const goto_functionst &goto_functions; typedef std::unordered_map memory_mapt; @@ -43,16 +112,40 @@ class interpretert irep_idt identifier; unsigned offset; mp_integer value; + // Initialized is annotated even during reads + // Set to 1 when written-before-read, -1 when read-before-written + mutable char initialized; }; typedef std::vector memoryt; - memoryt memory; + typedef std::map parameter_sett; + // mapping -> value + typedef std::pair struct_member_idt; + typedef std::map struct_valuest; + + + // The memory is being annotated/reshaped even during reads + // (ie to find a read-before-write location) thus memory + // properties need to be mutable to avoid making all calls nonconst + mutable memoryt memory; std::size_t stack_pointer; void build_memory_map(); void build_memory_map(const symbolt &symbol); - unsigned get_size(const typet &type) const; + mp_integer build_memory_map(const irep_idt &id, const typet &type); + typet concretize_type(const typet &type); + size_t get_size(const typet &type); + + irep_idt get_component_id(const irep_idt &object, unsigned offset); + typet get_type(const irep_idt &id) const; + exprt get_value( + const typet &type, + std::size_t offset=0, + bool use_non_det=false); + exprt get_value(const typet &type, mp_vectort &rhs, std::size_t offset=0); + exprt get_value(const irep_idt &id); + void step(); void execute_assert(); @@ -62,21 +155,26 @@ class interpretert void execute_function_call(); void execute_other(); void execute_decl(); + void clear_input_flags(); + + void allocate( + mp_integer address, + size_t size); void assign( mp_integer address, - const std::vector &rhs); + const mp_vectort &rhs); void read( mp_integer address, - std::vector &dest) const; + mp_vectort &dest) const; - void command(); + virtual void command(); class stack_framet { public: - goto_programt::const_targett return_PC; + goto_programt::const_targett return_pc; goto_functionst::function_mapt::const_iterator return_function; mp_integer return_value_address; memory_mapt local_map; @@ -84,27 +182,56 @@ class interpretert }; typedef std::stack call_stackt; + call_stackt call_stack; + input_valuest input_vars; + list_input_varst function_input_vars; goto_functionst::function_mapt::const_iterator function; - goto_programt::const_targett PC, next_PC; + goto_programt::const_targett pc, next_pc, target_assert; + goto_tracet steps; bool done; - - bool evaluate_boolean(const exprt &expr) const + bool show; + bool stop_on_assertion; + static const size_t npos; + size_t num_steps; + size_t total_steps; + + dynamic_typest dynamic_types; + int num_dynamic_objects; + size_t stack_depth; + int thread_id; + + bool evaluate_boolean(const exprt &expr) { - std::vector v; + mp_vectort v; evaluate(expr, v); if(v.size()!=1) throw "invalid boolean value"; return v.front()!=0; } + bool count_type_leaves( + const typet &source_type, + mp_integer &result); + + bool byte_offset_to_memory_offset( + const typet &source_type, + mp_integer byte_offset, + mp_integer &result); + + bool memory_offset_to_byte_offset( + const typet &source_type, + mp_integer cell_offset, + mp_integer &result); + void evaluate( const exprt &expr, - std::vector &dest) const; + mp_vectort &dest); - mp_integer evaluate_address(const exprt &expr) const; + mp_integer evaluate_address(const exprt &expr, bool fail_quietly=false); + void initialize(bool init); void show_state(); }; diff --git a/src/goto-programs/interpreter_evaluate.cpp b/src/goto-programs/interpreter_evaluate.cpp index a9d62740cc3..712d4b9d807 100644 --- a/src/goto-programs/interpreter_evaluate.cpp +++ b/src/goto-programs/interpreter_evaluate.cpp @@ -8,42 +8,323 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include +#include +#include #include "interpreter_class.h" /*******************************************************************\ -Function: interpretert::evaluate +Function: interpretert::read Inputs: Outputs: - Purpose: + Purpose: reads a memory address and loads it into the dest variable + marks cell as read before written if cell has never been written \*******************************************************************/ void interpretert::read( mp_integer address, - std::vector &dest) const + mp_vectort &dest) const { // copy memory region - for(auto &d : dest) + for(size_t i=0; i0) + cell.initialized=0; + } +} + +/*******************************************************************\ + +Function: interpretert::count_type_leaves + + Inputs: Type + + Outputs: Number of leaf primitive types; returns true on error + + Purpose: + +\*******************************************************************/ + +bool interpretert::count_type_leaves(const typet &ty, mp_integer &result) +{ + if(ty.id()==ID_struct) + { + result=0; + mp_integer subtype_count; + for(const auto &component : to_struct_type(ty).components()) + { + if(count_type_leaves(component.type(), subtype_count)) + return true; + result+=subtype_count; + } + return false; + } + else if(ty.id()==ID_array) + { + const auto &at=to_array_type(ty); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer subtype_count; + if(count_type_leaves(at.subtype(), subtype_count)) + return true; + result=array_size_vec[0]*subtype_count; + return false; + } + else + { + result=1; + return false; + } +} + +/*******************************************************************\ + +Function: interpretert::byte_offset_to_memory_offset + + Inputs: 'source_type', 'offset' (unit: bytes), + + Outputs: Offset into a vector of interpreter values; returns true on error + + Purpose: Supposing the caller has an mp_vector representing + a value with type 'source_type', this yields the offset into that + vector at which to find a value at *byte* address 'offset'. + We need this because the interpreter's memory map uses unlabelled + variable-width values -- for example, a C value { { 1, 2 }, 3, 4 } + of type struct { int x[2]; char y; unsigned long z; } + would be represented [1,2,3,4], with the source type needed alongside + to figure out which member is targeted by a byte-extract operation. + +\*******************************************************************/ + +bool interpretert::byte_offset_to_memory_offset( + const typet &source_type, + mp_integer offset, + mp_integer &result) +{ + if(source_type.id()==ID_struct) + { + const auto &st=to_struct_type(source_type); + const struct_typet::componentst &components=st.components(); + member_offset_iterator component_offsets(st, ns); + mp_integer previous_member_offsets=0; + for(; component_offsets->firstsecond!=-1 && + component_offsets->second<=offset; + ++component_offsets) + { + const auto &component_type=components[component_offsets->first].type(); + mp_integer component_byte_size=pointer_offset_size(component_type, ns); + if(component_byte_size<0) + return true; + if((component_offsets->second+component_byte_size)>offset) + { + mp_integer subtype_result; + bool ret=byte_offset_to_memory_offset( + component_type, + offset-(component_offsets->second), + subtype_result); + result=previous_member_offsets+subtype_result; + return ret; + } + else + { + mp_integer component_count; + if(count_type_leaves(component_type, component_count)) + return true; + previous_member_offsets+=component_count; + } + } + // Ran out of struct members, or struct contained a variable-length field. + return true; + } + else if(source_type.id()==ID_array) + { + const auto &at=to_array_type(source_type); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer array_size=array_size_vec[0]; + mp_integer elem_size_bytes=pointer_offset_size(at.subtype(), ns); + if(elem_size_bytes<=0) + return true; + mp_integer elem_size_leaves; + if(count_type_leaves(at.subtype(), elem_size_leaves)) + return true; + mp_integer this_idx=offset/elem_size_bytes; + if(this_idx>=array_size_vec[0]) + return true; + mp_integer subtype_result; + bool ret=byte_offset_to_memory_offset( + at.subtype(), + offset%elem_size_bytes, + subtype_result); + result=subtype_result+(elem_size_leaves*this_idx); + return ret; + } + else + { + result=0; + // Can't currently subdivide a primitive. + return offset!=0; + } +} + +/*******************************************************************\ + +Function: interpretert::memory_offset_to_byte_offset - ++address; + Inputs: An interpreter memory offset and the type to interpret that memory + + Outputs: The corresponding byte offset. Returns true on error + + Purpose: Similarly to the above, the interpreter's memory objects contain + mp_integers that represent variable-sized struct members. This + counts the size of type leaves to determine the byte offset + corresponding to a memory offset. + +\*******************************************************************/ + +bool interpretert::memory_offset_to_byte_offset( + const typet &source_type, + mp_integer cell_offset, + mp_integer &result) +{ + if(source_type.id()==ID_struct) + { + const auto &st=to_struct_type(source_type); + const struct_typet::componentst &components=st.components(); + member_offset_iterator offsets(st, ns); + mp_integer previous_member_sizes; + for(; offsets->firstsecond!=-1; ++offsets) + { + const auto &component_type=components[offsets->first].type(); + mp_integer component_count; + if(count_type_leaves(component_type, component_count)) + return true; + if(component_count>cell_offset) + { + mp_integer subtype_result; + bool ret=memory_offset_to_byte_offset( + component_type, + cell_offset, + subtype_result); + result=previous_member_sizes+subtype_result; + return ret; + } + else + { + cell_offset-=component_count; + mp_integer component_size=pointer_offset_size(component_type, ns); + if(component_size<0) + return true; + previous_member_sizes+=component_size; + } + } + // Ran out of members, or member of indefinite size + return true; + } + else if(source_type.id()==ID_array) + { + const auto &at=to_array_type(source_type); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer elem_size=pointer_offset_size(at.subtype(), ns); + if(elem_size==-1) + return true; + mp_integer elem_count; + if(count_type_leaves(at.subtype(), elem_count)) + return true; + mp_integer this_idx=cell_offset/elem_count; + if(this_idx>=array_size_vec[0]) + return true; + mp_integer subtype_result; + bool ret= + memory_offset_to_byte_offset(at.subtype(), + cell_offset%elem_count, + subtype_result); + result=subtype_result+(elem_size*this_idx); + return ret; + } + else + { + // Primitive type. + result=0; + return cell_offset!=0; } } @@ -61,12 +342,55 @@ Function: interpretert::evaluate void interpretert::evaluate( const exprt &expr, - std::vector &dest) const + mp_vectort &dest) { if(expr.id()==ID_constant) { if(expr.type().id()==ID_struct) { + dest.reserve(get_size(expr.type())); + bool error=false; + + forall_operands(it, expr) + { + if(it->type().id()==ID_code) + continue; + + size_t sub_size=get_size(it->type()); + if(sub_size==0) + continue; + + mp_vectort tmp; + evaluate(*it, tmp); + + if(tmp.size()==sub_size) + { + for(size_t i=0; itype().id()==ID_code) continue; - unsigned sub_size=get_size(it->type()); + size_t sub_size=get_size(it->type()); if(sub_size==0) continue; - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==sub_size) { - for(unsigned i=0; i tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -172,7 +691,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1 && tmp.front()!=0) @@ -191,20 +710,20 @@ void interpretert::evaluate( if(expr.operands().size()!=3) throw "if expects three operands"; - std::vector tmp0, tmp1, tmp2; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); - evaluate(expr.op1(), tmp1); - evaluate(expr.op2(), tmp2); - if(tmp0.size()==1 && tmp1.size()==1 && tmp2.size()==1) + if(tmp0.size()==1) { - const mp_integer &op0=tmp0.front(); - const mp_integer &op1=tmp1.front(); - const mp_integer &op2=tmp2.front(); - - dest.push_back(op0!=0?op1:op2); + if(tmp0.front()!=0) + evaluate(expr.op1(), tmp1); + else + evaluate(expr.op2(), tmp1); } + if(tmp1.size()==1) + dest.push_back(tmp1.front()); + return; } else if(expr.id()==ID_and) @@ -216,7 +735,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1 && tmp.front()==0) @@ -235,7 +754,7 @@ void interpretert::evaluate( if(expr.operands().size()!=1) throw id2string(expr.id())+" expects one operand"; - std::vector tmp; + mp_vectort tmp; evaluate(expr.op0(), tmp); if(tmp.size()==1) @@ -249,7 +768,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1) result+=tmp.front(); @@ -281,7 +800,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1) { @@ -317,7 +836,7 @@ void interpretert::evaluate( if(expr.operands().size()!=2) throw "- expects two operands"; - std::vector tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -330,7 +849,7 @@ void interpretert::evaluate( if(expr.operands().size()!=2) throw "/ expects two operands"; - std::vector tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -343,7 +862,7 @@ void interpretert::evaluate( if(expr.operands().size()!=1) throw "unary- expects one operand"; - std::vector tmp0; + mp_vectort tmp0; evaluate(expr.op0(), tmp0); if(tmp0.size()==1) @@ -358,22 +877,130 @@ void interpretert::evaluate( dest.push_back(evaluate_address(expr.op0())); return; } + else if(expr.id()==ID_pointer_offset) + { + if(expr.operands().size()!=1) + throw "pointer_offset expects one operand"; + if(expr.op0().type().id()!=ID_pointer) + throw "pointer_offset expects a pointer operand"; + mp_vectort result; + evaluate(expr.op0(), result); + if(result.size()==1) + { + // Return the distance, in bytes, between the address returned + // and the beginning of the underlying object. + mp_integer address=result[0]; + if(address>0 && address0) + { + dest.insert(dest.end(), + extract_from.begin()+memory_offset.to_long(), + extract_from.begin()+(memory_offset+target_type_leaves).to_long()); + return; + } + } + } + } else if(expr.id()==ID_dereference || expr.id()==ID_index || expr.id()==ID_symbol || expr.id()==ID_member) { - mp_integer a=evaluate_address(expr); - dest.resize(get_size(expr.type())); - read(a, dest); - return; + mp_integer address=evaluate_address( + expr, + true); // fail quietly + if(address.is_zero() && expr.id()==ID_index) + { + // Try reading from a constant array: + mp_vectort idx; + evaluate(expr.op1(), idx); + if(idx.size()==1) + { + mp_integer read_from_index=idx[0]; + if(expr.op0().id()==ID_array) + { + const auto &ops=expr.op0().operands(); + assert(read_from_index.is_long()); + if(read_from_index>=0 && read_from_index tmp; + mp_vectort tmp; evaluate(expr.op0(), tmp); if(tmp.size()==1) @@ -399,31 +1026,38 @@ void interpretert::evaluate( dest.push_back(binary2integer(s, false)); return; } - else if(expr.type().id()==ID_bool) + else if((expr.type().id()==ID_bool) || (expr.type().id()==ID_c_bool)) { dest.push_back(value!=0); return; } } } - else if(expr.id()==ID_ashr) + else if((expr.id()==ID_array) || (expr.id()==ID_array_of)) { - if(expr.operands().size()!=2) - throw "ashr expects two operands"; - - std::vector tmp0, tmp1; - evaluate(expr.op0(), tmp0); - evaluate(expr.op1(), tmp1); - - if(tmp0.size()==1 && tmp1.size()==1) - dest.push_back(tmp0.front()/power(2, tmp1.front())); - + forall_operands(it, expr) + { + evaluate(*it, dest); + } return; } - - std::cout << "!! failed to evaluate expression: " - << from_expr(ns, function->first, expr) - << std::endl; + else if(expr.id()==ID_nil) + { + dest.push_back(0); + return; + } + else if(expr.id()==ID_infinity) + { + if(expr.type().id()==ID_signedbv) + { + error() << "Infinite size arrays not supported" << eom; + return; + } + } + error() << "!! failed to evaluate expression: " + << from_expr(ns, function->first, expr) << "\n" + << expr.id() << "[" << expr.type().id() << "]" + << eom; } /*******************************************************************\ @@ -438,11 +1072,16 @@ Function: interpretert::evaluate_address \*******************************************************************/ -mp_integer interpretert::evaluate_address(const exprt &expr) const +mp_integer interpretert::evaluate_address( + const exprt &expr, + bool fail_quietly) { if(expr.id()==ID_symbol) { - const irep_idt &identifier=expr.get(ID_identifier); + const irep_idt &identifier= + is_ssa_expr(expr) ? + to_ssa_expr(expr).get_original_name() : + expr.get(ID_identifier); interpretert::memory_mapt::const_iterator m_it1= memory_map.find(identifier); @@ -458,13 +1097,16 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const if(m_it2!=call_stack.top().local_map.end()) return m_it2->second; } + mp_integer address=memory.size(); + build_memory_map(to_symbol_expr(expr).get_identifier(), expr.type()); + return address; } else if(expr.id()==ID_dereference) { if(expr.operands().size()!=1) throw "dereference expects one operand"; - std::vector tmp0; + mp_vectort tmp0; evaluate(expr.op0(), tmp0); if(tmp0.size()==1) @@ -475,11 +1117,15 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const if(expr.operands().size()!=2) throw "index expects two operands"; - std::vector tmp1; + mp_vectort tmp1; evaluate(expr.op1(), tmp1); if(tmp1.size()==1) - return evaluate_address(expr.op0())+tmp1.front(); + { + auto base=evaluate_address(expr.op0(), fail_quietly); + if(!base.is_zero()) + return base+tmp1.front(); + } } else if(expr.id()==ID_member) { @@ -502,12 +1148,44 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const offset+=get_size(comp.type()); } - return evaluate_address(expr.op0())+offset; + auto base=evaluate_address(expr.op0(), fail_quietly); + if(!base.is_zero()) + return base+offset; } - - std::cout << "!! failed to evaluate address: " + else if(expr.id()==ID_byte_extract_little_endian || + expr.id()==ID_byte_extract_big_endian) + { + if(expr.operands().size()!=2) + throw "byte_extract should have two operands"; + mp_vectort extract_offset; + evaluate(expr.op1(), extract_offset); + mp_vectort extract_from; + evaluate(expr.op0(), extract_from); + if(extract_offset.size()==1 && !extract_from.empty()) + { + mp_integer memory_offset; + if(!byte_offset_to_memory_offset(expr.op0().type(), + extract_offset[0], memory_offset)) + return evaluate_address(expr.op0(), fail_quietly)+memory_offset; + } + } + else if(expr.id()==ID_if) + { + mp_vectort result; + if_exprt address_cond( + expr.op0(), + address_of_exprt(expr.op1()), + address_of_exprt(expr.op2())); + evaluate(address_cond, result); + if(result.size()==1) + return result[0]; + } + if(!fail_quietly) + { + error() << "!! failed to evaluate address: " << from_expr(ns, function->first, expr) - << std::endl; + << eom; + } return 0; }