From 1aede785e785904825ff3e62aa432b9e0ce585d4 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Sat, 31 Dec 2016 15:27:04 +0300 Subject: [PATCH 01/15] Preprocessing of goto programs for string refinement --- .../string_refine_preprocess.cpp | 1289 +++++++++++++++++ src/goto-programs/string_refine_preprocess.h | 215 +++ src/util/std_expr.h | 13 + 3 files changed, 1517 insertions(+) create mode 100644 src/goto-programs/string_refine_preprocess.cpp create mode 100644 src/goto-programs/string_refine_preprocess.h diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp new file mode 100644 index 00000000000..c4dc1a92960 --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -0,0 +1,1289 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the string solver + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +// TODO: refined_string_type should be moved to util +#include +#include + +#include "string_refine_preprocess.h" + +/******************************************************************* \ + +Function: string_refine_preprocesst::fresh_array + + Inputs: + type - an array type + location - a location in the program + + Outputs: a new symbol + + Purpose: add a symbol with static lifetime and name containing + `cprover_string_array` and given type + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::fresh_array( + const typet &type, const source_locationt &location) +{ + symbolt array_symbol=get_fresh_aux_symbol( + type, + "cprover_string_array", + "cprover_string_array", + location, + ID_java, + symbol_table); + array_symbol.is_static_lifetime=true; + return array_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::fresh_string + + Inputs: + type - a type for refined strings + location - a location in the program + + Outputs: a new symbol + + Purpose: add a symbol with static lifetime and name containing + `cprover_string` and given type + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::fresh_string( + const typet &type, const source_locationt &location) +{ + symbolt array_symbol=get_fresh_aux_symbol( + type, "cprover_string", "cprover_string", location, ID_java, symbol_table); + array_symbol.is_static_lifetime=true; + return array_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::declare_function + + Inputs: a name and a type + + Purpose: declare a function with the given name and type + +\*******************************************************************/ + +void string_refine_preprocesst::declare_function( + irep_idt function_name, const typet &type) +{ + auxiliary_symbolt func_symbol; + func_symbol.base_name=function_name; + func_symbol.is_static_lifetime=false; + func_symbol.mode=ID_java; + func_symbol.name=function_name; + func_symbol.type=type; + symbol_table.add(func_symbol); + goto_functions.function_map[function_name]; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::get_data_and_length_type_of_string + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a java string, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type) +{ + assert(refined_string_typet::is_java_string_type(expr.type()) || + refined_string_typet::is_java_string_builder_type(expr.type())); + typet object_type=ns.follow(expr.type()); + const struct_typet &struct_type=to_struct_type(object_type); + for(const auto &component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_string_assign + + Inputs: a goto_program, a position in this program, an expression and a + location + + Outputs: an expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs if it is a string + pointer and the original rhs otherwise. + +\*******************************************************************/ + +exprt string_refine_preprocesst::make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + if(refined_string_typet::is_java_string_pointer_type(rhs.type())) + { + auto pair=java_to_cprover_strings.insert( + std::pair(rhs, nil_exprt())); + + if(pair.second) + { + // We do the following assignments: + // cprover_string_array = *(rhs->data) + // cprover_string = { rhs->length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + member_exprt length(deref, "length", length_type); + symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); + + // string expression for the rhs of the second assignment + string_exprt new_rhs( + length, array_lhs, refined_string_typet(length_type, data_type)); + + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + + std::list assignments; + assignments.emplace_back(array_lhs, deref_data); + assignments.emplace_back(lhs, new_rhs); + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + pair.first->second=lhs; + } + return pair.first->second; + } + else if(rhs.id()==ID_typecast && + refined_string_typet::is_java_string_pointer_type(rhs.op0().type())) + { + exprt new_rhs=make_cprover_string_assign( + goto_program, target, rhs.op0(), location); + return typecast_exprt(new_rhs, rhs.type()); + } + else + return rhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_normal_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=function_name(arguments) : return_type @ location + If given, signature can force String conversion of given arguments. + The convention for signature is one character by argument + and 'S' denotes string. + +\*******************************************************************/ + +void string_refine_preprocesst::make_normal_assign( + goto_programt &goto_program, + goto_programt::targett target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + if(function_name==ID_cprover_string_copy_func) + { + assert(!arguments.empty()); + make_string_copy(goto_program, target, lhs, arguments[0], location); + } + else + { + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); + + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs.arguments()=processed_arguments; + + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + target->make_assignment(); + target->code=assignment; + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::insert_assignments + + Inputs: a goto_program, a position in this program, a list of assignments + + Purpose: add the assignments to the program in the order they are given + +\*******************************************************************/ + +void string_refine_preprocesst::insert_assignments( + goto_programt &goto_program, + goto_programt::targett &target, + irep_idt function, + source_locationt location, + const std::list &va) +{ + if(va.empty()) + return; + + auto i=va.begin(); + target->make_assignment(); + target->code=*i; + target->function=function; + target->source_location=location; + for(i++; i!=va.end(); i++) + { + target=goto_program.insert_after(target); + target->make_assignment(); + target->code=*i; + target->function=function; + target->source_location=location; + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=malloc(String *) + > lhs->length=function_name_length(arguments) + > tmp_data=function_name_data(arguments) + > lhs->data=&tmp_data + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + assert(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())); + dereference_exprt deref(lhs, lhs.type().subtype()); + typet object_type=ns.follow(deref.type()); + exprt object_size=size_of_expr(object_type, ns); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + + std::string fnamel=id2string(function_name)+"_length"; + std::string fnamed=id2string(function_name)+"_data"; + declare_function(fnamel, length_type); + declare_function(fnamed, data_type); + function_application_exprt rhs_length(symbol_exprt(fnamel), length_type); + function_application_exprt rhs_data( + symbol_exprt(fnamed), data_type.subtype()); + + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs_length.arguments()=processed_arguments; + rhs_data.arguments()=processed_arguments; + + symbolt sym_length=get_fresh_aux_symbol( + length_type, "length", "length", location, ID_java, symbol_table); + symbol_exprt tmp_length=sym_length.symbol_expr(); + symbol_exprt tmp_array=fresh_array(data_type.subtype(), location); + member_exprt lhs_length(deref, "length", length_type); + member_exprt lhs_data(deref, "data", tmp_array.type()); + + // lhs=malloc(String *) + assert(object_size.is_not_nil()); // got nil object_size + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + + std::list assigns; + assigns.emplace_back(lhs, malloc_expr); + assigns.emplace_back(tmp_length, rhs_length); + assigns.emplace_back(lhs_length, tmp_length); + assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); + insert_assignments(goto_program, target, target->function, location, assigns); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: assign the result of the function application to lhs, + in case the function type is string, it does a special assignment + using `make_string_assign` + +\*******************************************************************/ + +void string_refine_preprocesst::make_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arg, + const source_locationt &loc, + const std::string &sig) +{ + if(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())) + make_string_assign( + goto_program, target, lhs, function_type, function_name, arg, loc, sig); + else + make_normal_assign( + goto_program, target, lhs, function_type, function_name, arg, loc, sig); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_copy + + Inputs: a goto_program, a position in this program, a lhs expression, + an argument expression and a location + + Outputs: an expression + + Purpose: replace the current instruction by: + > lhs->length=argument->length + > tmp_data=*(argument->data) + > lhs->data=&tmp_data + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_copy( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const exprt &argument, + const source_locationt &location) +{ + // TODO : treat CharSequence and StringBuffer + assert(refined_string_typet::is_java_string_pointer_type(lhs.type()) || + refined_string_typet::is_java_string_builder_pointer_type(lhs.type())); + exprt deref=dereference_exprt(lhs, lhs.type().subtype()); + + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + + dereference_exprt deref_arg(argument, argument.type().subtype()); + std::list assignments; + + exprt lhs_length=get_length(deref, length_type); + exprt rhs_length=get_length(deref_arg, length_type); + assignments.emplace_back(lhs_length, rhs_length); + + symbol_exprt tmp_data=fresh_array(data_type.subtype(), location); + exprt rhs_data=get_data(deref_arg, data_type); + exprt lhs_data=get_data(deref, data_type); + assignments.emplace_back( + tmp_data, dereference_exprt(rhs_data, data_type.subtype())); + assignments.emplace_back(lhs_data, address_of_exprt(tmp_data)); + + insert_assignments( + goto_program, target, target->function, location, assignments); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name, an expression lhs, + a function type, name, arguments, a location and a signature string + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + if(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())) + make_string_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name and two Boolean options + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + option `assign_first_arg` uses `s` instead of `lhs` in the resulting + expression; + option `skip_first_arg`, removes `s` from the arguments, ie `x` is + the first one + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg, + bool skip_first_arg) +{ + code_function_callt &function_call=to_code_function_call(target->code); + code_typet function_type=to_code_type(function_call.function().type()); + code_typet new_type; + const source_locationt &loc=function_call.source_location(); + declare_function(function_name, function_type); + function_application_exprt rhs; + std::vector args; + if(assign_first_arg) + { + assert(!function_call.arguments().empty()); + rhs.type()=function_call.arguments()[0].type(); + } + else + rhs.type()=function_type.return_type(); + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + + std::size_t start_index=skip_first_arg?1:0; + for(std::size_t i=start_index; icode); + assert(!function_call.arguments().empty()); + string_builders[function_call.lhs()]=function_call.arguments()[0]; + make_string_function( + goto_program, target, function_name, signature, true, false); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::build_function_application + + Inputs: a function name, a type, a location and a vector of arguments + + Outputs: a function application expression + + Purpose: declare a function and construct an function application expression + with the given function name, type, location and arguments + +\*******************************************************************/ + +function_application_exprt + string_refine_preprocesst::build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments) +{ + declare_function(function_name, type); + function_application_exprt function_app(symbol_exprt(function_name), type); + function_app.add_source_location()=location; + for(const auto &arg : arguments) + function_app.arguments().push_back(arg); + + return function_app; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_to_char_array_function + + Inputs: a goto program and a position in that goto program + + Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0->data=&((s->data)[0]) + > return_tmp0->length=s->length + +\*******************************************************************/ + +void string_refine_preprocesst::make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &target) +{ + const code_function_callt &function_call=to_code_function_call(target->code); + + assert(!function_call.arguments().empty()); + const exprt &string_argument=function_call.arguments()[0]; + assert(refined_string_typet::is_java_string_pointer_type( + string_argument.type())); + + dereference_exprt deref(string_argument, string_argument.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // &((s->data)[0]) + exprt rhs_data=get_data(deref, data_type); + dereference_exprt rhs_array(rhs_data, data_type.subtype()); + exprt first_index=from_integer(0, java_int_type()); + index_exprt first_element(rhs_array, first_index, java_char_type()); + address_of_exprt rhs_pointer(first_element); + + // return_tmp0->data=&((s->data)[0]) + typet deref_type=function_call.lhs().type().subtype(); + dereference_exprt deref_lhs(function_call.lhs(), deref_type); + exprt lhs_data=get_data(deref_lhs, data_type); + assignments.emplace_back(lhs_data, rhs_pointer); + + // return_tmp0->length=s->length + exprt rhs_length=get_length(deref, length_type); + exprt lhs_length=get_length(deref_lhs, length_type); + assignments.emplace_back(lhs_length, rhs_length); + source_locationt location=target->source_location; + insert_assignments( + goto_program, target, target->function, location, assignments); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression and a + location + + Outputs: a char array expression (not a pointer) + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + // TODO : add an assertion on the type of rhs + + // We do the following assignments: + // cprover_string_array = rhs.data + // cprover_string = { rhs.length; cprover_string_array } + + // string expression for the rhs of the second assignment + string_exprt new_rhs(java_char_type()); + + typet data_type=new_rhs.content().type(); + typet length_type=java_int_type(); + + symbol_exprt array_lhs=fresh_array(data_type, location); + exprt array_rhs=get_data(rhs, new_rhs.content().type()); + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + new_rhs.length()=get_length(rhs, length_type); + new_rhs.content()=array_lhs; + + std::list assignments; + assignments.emplace_back(array_lhs, array_rhs); + assignments.emplace_back(lhs, new_rhs); + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return lhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_char_array_function + + Inputs: a position in a goto program, a function name, two Boolean options, + and the index of the char array argument in the function + + Purpose: at the given position replace + `lhs=s.some_function(...,char_array,...)` by + > cprover_string = { char_array->length, *char_array } + > tmp_string=function_name(s, cprover_string, ...) + option `assign_first_arg` uses `s` instead of `lhs` in the second + assignment; + option `skip_first_arg`, removes `s` from the arguments, ie `x` is + the first one; + argument index gives the index of the argument containing char_array + +\*******************************************************************/ + +void string_refine_preprocesst::make_char_array_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + std::size_t index, + bool assign_first_arg, + bool skip_first_arg) +{ + code_function_callt &function_call=to_code_function_call(target->code); + code_typet function_type=to_code_type(function_call.function().type()); + code_typet new_function_type; + const source_locationt &location=function_call.source_location(); + assert(!function_call.arguments().size()>index); + const std::vector &args=function_call.arguments(); + std::vector new_args; + + exprt lhs; + if(assign_first_arg) + { + assert(!function_call.arguments().empty()); + lhs=function_call.arguments()[0]; + else + lhs=function_call.lhs(); + + if(lhs.id()==ID_typecast) + lhs=to_typecast_expr(lhs).op(); + + exprt char_array=dereference_exprt( + function_call.arguments()[index], + function_call.arguments()[index].type().subtype()); + exprt string=make_cprover_char_array_assign( + goto_program, target, char_array, location); + + std::size_t start_index=skip_first_arg?1:0; + for(std::size_t i=start_index; icode); + assert(!function_call.arguments().empty()); + string_builders[function_call.lhs()]=function_call.arguments()[0]; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::process_arguments + + Inputs: a goto program, a position, a list of expressions, a location and a + signature + + Outputs: a list of expressions + + Purpose: for each expression that is a string or that is at a position with + an 'S' character in the signature, we declare a new `cprover_string` + whose contents is deduced from the expression and replace the + expression by this cprover_string in the output list; + in the other case the expression is kept as is for the output list. + +\*******************************************************************/ + +exprt::operandst string_refine_preprocesst::process_arguments( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + exprt::operandst new_arguments; + + for(std::size_t i=0; isecond); + else + { + if(isecond; + else + return ""; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::replace_string_calls + + Inputs: a function in a goto_program + + Purpose: goes through the instructions, replace function calls to string + function by equivalent instructions using functions defined + for the string solver, replace string literals by string + expressions of the type used by the string solver + TODO: the current implementation is only for java functions, + we should add support for other languages + +\*******************************************************************/ + +void string_refine_preprocesst::replace_string_calls( + goto_functionst::function_mapt::iterator f_it) +{ + goto_programt &goto_program=f_it->second.body; + + Forall_goto_program_instructions(target, goto_program) + { + if(target->is_function_call()) + { + code_function_callt &function_call=to_code_function_call(target->code); + for(auto &arg : function_call.arguments()) + { + auto sb_it=string_builders.find(arg); + if(sb_it!=string_builders.end()) + arg=sb_it->second; + } + + if(function_call.function().id()==ID_symbol) + { + const irep_idt &function_id= + to_symbol_expr(function_call.function()).get_identifier(); + std::string signature=function_signature(function_id); + + auto it=string_functions.find(function_id); + if(it!=string_functions.end()) + make_string_function( + goto_program, target, it->second, signature, false, false); + + it=side_effect_functions.find(function_id); + if(it!=side_effect_functions.end()) + make_string_function_side_effect( + goto_program, target, it->second, signature); + + it=string_function_calls.find(function_id); + if(it!=string_function_calls.end()) + make_string_function_call( + goto_program, target, it->second, signature); + + it=string_of_char_array_functions.find(function_id); + if(it!=string_of_char_array_functions.end()) + make_char_array_function( + goto_program, target, it->second, signature, 0); + + it=string_of_char_array_function_calls.find(function_id); + if(it!=string_of_char_array_function_calls.end()) + make_char_array_function_call( + goto_program, target, it->second, signature); + + it=side_effect_char_array_functions.find(function_id); + if(it!=side_effect_char_array_functions.end()) + make_char_array_side_effect( + goto_program, target, it->second, signature); + + if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) + make_to_char_array_function(goto_program, target); + } + } + else + { + if(target->is_assign()) + { + // In assignments we replace string literals and C string functions + code_assignt assignment=to_code_assign(target->code); + + exprt new_rhs=assignment.rhs(); + code_assignt new_assignment(assignment.lhs(), new_rhs); + + if(new_rhs.id()==ID_function_application) + { + function_application_exprt f=to_function_application_expr(new_rhs); + const exprt &name=f.function(); + assert(name.id()==ID_symbol); + const irep_idt &id=to_symbol_expr(name).get_identifier(); + auto it=c_string_functions.find(id); + if(it!=c_string_functions.end()) + { + declare_function(it->second, f.type()); + f.function()=symbol_exprt(it->second); + new_assignment=code_assignt(assignment.lhs(), f); + } + } + + new_assignment.add_source_location()=assignment.source_location(); + target->make_assignment(); + target->code=new_assignment; + } + } + } + return; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::initialize_string_function_table + + Purpose: fill maps with correspondance from java method names to cprover + functions + +\*******************************************************************/ + +void string_refine_preprocesst::initialize_string_function_table() +{ + string_functions["java::java.lang.String.codePointAt:(I)I"]= + ID_cprover_string_code_point_at_func; + string_functions["java::java.lang.String.codePointBefore:(I)I"]= + ID_cprover_string_code_point_before_func; + string_functions["java::java.lang.String.codePointCount:(II)I"]= + ID_cprover_string_code_point_count_func; + string_functions["java::java.lang.String.offsetByCodePoints:(II)I"]= + ID_cprover_string_offset_by_code_point_func; + string_functions["java::java.lang.String.hashCode:()I"]= + ID_cprover_string_hash_code_func; + string_functions["java::java.lang.String.indexOf:(I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(II)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(I)I"]= + ID_cprover_string_last_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(II)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.concat:(Ljava/lang/String;)Ljava/lang/String;"]= + ID_cprover_string_concat_func; + string_functions["java::java.lang.String.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.StringBuilder.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]= + ID_cprover_string_equal_func; + string_functions + ["java::java.lang.String.equalsIgnoreCase:(Ljava/lang/String;)Z"]= + ID_cprover_string_equals_ignore_case_func; + string_functions["java::java.lang.String.startsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_startswith_func; + string_functions + ["java::java.lang.String.startsWith:(Ljava/lang/String;I)Z"]= + ID_cprover_string_startswith_func; + string_functions["java::java.lang.String.endsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_endswith_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.String.subSequence:(II)Ljava/lang/CharSequence;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.trim:()Ljava/lang/String;"]= + ID_cprover_string_trim_func; + string_functions["java::java.lang.String.toLowerCase:()Ljava/lang/String;"]= + ID_cprover_string_to_lower_case_func; + string_functions["java::java.lang.String.toUpperCase:()Ljava/lang/String;"]= + ID_cprover_string_to_upper_case_func; + string_functions["java::java.lang.String.replace:(CC)Ljava/lang/String;"]= + ID_cprover_string_replace_func; + string_functions + ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + ID_cprover_string_contains_func; + string_functions["java::java.lang.String.compareTo:(Ljava/lang/String;)I"]= + ID_cprover_string_compare_to_func; + string_functions["java::java.lang.String.intern:()Ljava/lang/String;"]= + ID_cprover_string_intern_func; + string_functions["java::java.lang.String.isEmpty:()Z"]= + ID_cprover_string_is_empty_func; + string_functions["java::java.lang.String.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.StringBuilder.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.CharSequence.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions + ["java::java.lang.StringBuilder.toString:()Ljava/lang/String;"]= + ID_cprover_string_copy_func; + + string_functions["java::java.lang.String.valueOf:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Float.toString:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Integer.toString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.String.valueOf:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.Integer.toHexString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_hex_func; + string_functions["java::java.lang.String.valueOf:(L)Ljava/lang/String;"]= + ID_cprover_string_of_long_func; + string_functions["java::java.lang.String.valueOf:(D)Ljava/lang/String;"]= + ID_cprover_string_of_double_func; + string_functions["java::java.lang.String.valueOf:(Z)Ljava/lang/String;"]= + ID_cprover_string_of_bool_func; + string_functions["java::java.lang.String.valueOf:(C)Ljava/lang/String;"]= + ID_cprover_string_of_char_func; + string_functions["java::java.lang.Integer.parseInt:(Ljava/lang/String;)I"]= + ID_cprover_string_parse_int_func; + + side_effect_functions + ["java::java.lang.StringBuilder.append:(Ljava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions["java::java.lang.StringBuilder.setCharAt:(IC)V"]= + ID_cprover_string_char_set_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(J)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(Z)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(D)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_double_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(F)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_float_func; + side_effect_functions + ["java::java.lang.StringBuilder.appendCodePoint:(I)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_code_point_func; + side_effect_functions + ["java::java.lang.StringBuilder.delete:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_func; + side_effect_functions + ["java::java.lang.StringBuilder.deleteCharAt:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_char_at_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IJ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IC)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IZ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.setLength:(I)V"]= + ID_cprover_string_set_length_func; + + + side_effect_char_array_functions + ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_array_func; + side_effect_char_array_functions + ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_array_func; + + string_function_calls + ["java::java.lang.String.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.String.:(Ljava/lang/StringBuilder;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.StringBuilder.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:()V"]= + ID_cprover_string_empty_string_func; + string_function_calls["java::java.lang.StringBuilder.:()V"]= + ID_cprover_string_empty_string_func; + + string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_of_char_array_func; + + string_of_char_array_functions + ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + + c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= + ID_cprover_string_literal_func; + c_string_functions["__CPROVER_uninterpreted_string_char_at_func"]= + ID_cprover_string_char_at_func; + c_string_functions["__CPROVER_uninterpreted_string_equal_func"]= + ID_cprover_string_equal_func; + c_string_functions["__CPROVER_uninterpreted_string_concat_func"]= + ID_cprover_string_concat_func; + c_string_functions["__CPROVER_uninterpreted_string_length_func"]= + ID_cprover_string_length_func; + c_string_functions["__CPROVER_uninterpreted_string_substring_func"]= + ID_cprover_string_substring_func; + c_string_functions["__CPROVER_uninterpreted_string_is_prefix_func"]= + ID_cprover_string_is_prefix_func; + c_string_functions["__CPROVER_uninterpreted_string_is_suffix_func"]= + ID_cprover_string_is_suffix_func; + c_string_functions["__CPROVER_uninterpreted_string_contains_func"]= + ID_cprover_string_contains_func; + c_string_functions["__CPROVER_uninterpreted_string_index_of_func"]= + ID_cprover_string_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_last_index_of_func"]= + ID_cprover_string_last_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_char_set_func"]= + ID_cprover_string_char_set_func; + c_string_functions["__CPROVER_uninterpreted_string_copy_func"]= + ID_cprover_string_copy_func; + c_string_functions["__CPROVER_uninterpreted_string_parse_int_func"]= + ID_cprover_string_parse_int_func; + c_string_functions["__CPROVER_uninterpreted_string_of_int_func"]= + ID_cprover_string_of_int_func; + + signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; + signatures + ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; +} + +/*******************************************************************\ + +Constructor: string_refine_preprocesst::string_refine_preprocesst + + Inputs: a symbol table, goto functions, a message handler + + Purpose: process the goto function by replacing calls to string functions + +\*******************************************************************/ + +string_refine_preprocesst::string_refine_preprocesst( + symbol_tablet &_symbol_table, + goto_functionst &_goto_functions, + message_handlert &_message_handler): + messaget(_message_handler), + ns(_symbol_table), + symbol_table(_symbol_table), + goto_functions(_goto_functions), + next_symbol_id(0), + jls_ptr(symbol_typet("java::java.lang.String")) +{ + initialize_string_function_table(); + Forall_goto_functions(it, goto_functions) + replace_string_calls(it); +} diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h new file mode 100644 index 00000000000..37e178f799a --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.h @@ -0,0 +1,215 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the PASS algorithm + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H +#define CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H + +#include +#include + +class string_refine_preprocesst:public messaget +{ + public: + string_refine_preprocesst( + symbol_tablet &, goto_functionst &, message_handlert &); + + private: + namespacet ns; + symbol_tablet & symbol_table; + goto_functionst & goto_functions; + + typedef std::unordered_map id_mapt; + typedef std::unordered_map expr_mapt; + + // String builders maps the different names of a same StringBuilder object + // to a unique expression. + expr_mapt string_builders; + + // Map name of Java string functions to there equivalent in the solver + id_mapt side_effect_functions; + id_mapt string_functions; + id_mapt c_string_functions; + id_mapt string_function_calls; + id_mapt string_of_char_array_functions; + id_mapt string_of_char_array_function_calls; + id_mapt side_effect_char_array_functions; + + std::unordered_map signatures; + expr_mapt hidden_strings; + expr_mapt java_to_cprover_strings; + + // unique id for each newly created symbols + int next_symbol_id; + + void initialize_string_function_table(); + + symbol_exprt fresh_array( + const typet &type, const source_locationt &location); + symbol_exprt fresh_string( + const typet &type, const source_locationt &location); + + // get the data member of a java string + static exprt get_data(const exprt &string, const typet &data_type) + { + return member_exprt(string, "data", data_type); + } + + // get the length member of a java string + exprt get_length(const exprt &string, const typet &length_type) + { + return member_exprt(string, "length", length_type); + } + + // type of pointers to string + pointer_typet jls_ptr; + exprt replace_string(const exprt &in); + exprt replace_string_in_assign(const exprt &in); + + void insert_assignments( + goto_programt &goto_program, + goto_programt::targett &target, + irep_idt function, + source_locationt location, + const std::list &va); + + exprt replace_string_pointer(const exprt &in); + + // Replace string builders by expression of the mapping and make + // assignments for strings as string_exprt + exprt::operandst process_arguments( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + // Signature of the named function if it is defined in the signature map, + // empty string otherwise + std::string function_signature(const irep_idt &function_id); + + void declare_function(irep_idt function_name, const typet &type); + + void get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type); + + function_application_exprt build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments); + + void make_normal_assign( + goto_programt &goto_program, + goto_programt::targett target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + void make_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + void make_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arg, + const source_locationt &loc, + const std::string &sig); + + exprt make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location); + + void make_string_copy( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const exprt &argument, + const source_locationt &location); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg=false, + bool skip_first_arg=false); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + void make_string_function_call( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void make_string_function_side_effect( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &); + + exprt make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location); + + void make_char_array_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + std::size_t index, + bool assign_first_arg=false, + bool skip_first_arg=false); + + void make_char_array_function_call( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void make_char_array_side_effect( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void replace_string_calls(goto_functionst::function_mapt::iterator f_it); +}; + +#endif diff --git a/src/util/std_expr.h b/src/util/std_expr.h index a2f78ab25e8..5c5856b4a9d 100644 --- a/src/util/std_expr.h +++ b/src/util/std_expr.h @@ -3450,6 +3450,19 @@ class function_application_exprt:public exprt operands().resize(2); } + explicit function_application_exprt(const typet &_type): + exprt(ID_function_application, _type) + { + operands().resize(2); + } + + function_application_exprt( + const symbol_exprt &_function, const typet &_type): + function_application_exprt(_type) // NOLINT(runtime/explicit) + { + function()=_function; + } + exprt &function() { return op0(); From 3a2e210d482e568f6114108ce28bf14aa43569ab Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 17 Feb 2017 13:10:40 +0000 Subject: [PATCH 02/15] Moving is_java_string_type functions to string_preprocess This avoid dependencies between goto-programs and solver. We also removed function is_unrefined_string_type which should not be needed in the string solver. Removed also mention of java string types and unrefined types in the solver. These mentions should not be necessary, since we are not supposed to do anything java specific. The solver should only have to deal with refined string types and possibly char arrays. --- .../string_refine_preprocess.cpp | 146 ++++++++++++++++-- src/goto-programs/string_refine_preprocess.h | 10 ++ .../refinement/refined_string_type.cpp | 95 ------------ src/solvers/refinement/refined_string_type.h | 36 +---- .../string_constraint_generator_constants.cpp | 2 +- .../string_constraint_generator_indexof.cpp | 20 ++- .../string_constraint_generator_main.cpp | 4 +- src/solvers/refinement/string_refinement.cpp | 6 +- 8 files changed, 166 insertions(+), 153 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index c4dc1a92960..b14153c26c1 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -22,7 +22,125 @@ Date: September 2016 #include "string_refine_preprocess.h" -/******************************************************************* \ +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_pointer_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_type(const typet &type) +{ + if(type.id()==ID_symbol) + { + const irep_idt &tag=to_symbol_type(type).get_identifier(); + return tag=="java::java.lang.String"; + } + else if(type.id()==ID_struct) + { + const irep_idt &tag=to_struct_type(type).get_tag(); + return tag=="java.lang.String"; + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string builder + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + const irep_idt &tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.StringBuilder"; + } + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java StringBuilder + pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_builder_type(subtype); + } + return false; +} +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + const irep_idt &tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.CharSequence"; + } + } + return false; +} + +/*******************************************************************\ Function: string_refine_preprocesst::fresh_array @@ -114,8 +232,8 @@ Function: string_refine_preprocesst::get_data_and_length_type_of_string void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { - assert(refined_string_typet::is_java_string_type(expr.type()) || - refined_string_typet::is_java_string_builder_type(expr.type())); + assert(is_java_string_type(expr.type()) || + is_java_string_builder_type(expr.type())); typet object_type=ns.follow(expr.type()); const struct_typet &struct_type=to_struct_type(object_type); for(const auto &component : struct_type.components()) @@ -146,7 +264,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(refined_string_typet::is_java_string_pointer_type(rhs.type())) + if(is_java_string_pointer_type(rhs.type())) { auto pair=java_to_cprover_strings.insert( std::pair(rhs, nil_exprt())); @@ -183,7 +301,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( return pair.first->second; } else if(rhs.id()==ID_typecast && - refined_string_typet::is_java_string_pointer_type(rhs.op0().type())) + is_java_string_pointer_type(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, target, rhs.op0(), location); @@ -303,8 +421,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())); + assert(is_java_string_pointer_type(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -371,8 +488,7 @@ void string_refine_preprocesst::make_assign( const source_locationt &loc, const std::string &sig) { - if(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())) + if(is_java_string_pointer_type(function_type.return_type())) make_string_assign( goto_program, target, lhs, function_type, function_name, arg, loc, sig); else @@ -404,8 +520,8 @@ void string_refine_preprocesst::make_string_copy( const source_locationt &location) { // TODO : treat CharSequence and StringBuffer - assert(refined_string_typet::is_java_string_pointer_type(lhs.type()) || - refined_string_typet::is_java_string_builder_pointer_type(lhs.type())); + assert(is_java_string_pointer_type(lhs.type()) || + is_java_string_builder_pointer_type(lhs.type())); exprt deref=dereference_exprt(lhs, lhs.type().subtype()); typet length_type, data_type; @@ -451,8 +567,7 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())) + if(is_java_string_pointer_type(function_type.return_type())) make_string_assign( goto_program, target, @@ -633,8 +748,7 @@ void string_refine_preprocesst::make_to_char_array_function( assert(!function_call.arguments().empty()); const exprt &string_argument=function_call.arguments()[0]; - assert(refined_string_typet::is_java_string_pointer_type( - string_argument.type())); + assert(is_java_string_pointer_type(string_argument.type())); dereference_exprt deref(string_argument, string_argument.type().subtype()); typet length_type, data_type; @@ -870,7 +984,7 @@ exprt::operandst string_refine_preprocesst::process_arguments( { while(arg.id()==ID_typecast) arg=arg.op0(); - if(!refined_string_typet::is_java_string_type(arg.type())) + if(!is_java_string_type(arg.type())) arg=typecast_exprt(arg, jls_ptr); } exprt arg2=make_cprover_string_assign( diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index 37e178f799a..613d0161926 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -51,6 +51,16 @@ class string_refine_preprocesst:public messaget void initialize_string_function_table(); + static bool is_java_string_pointer_type(const typet &type); + + static bool is_java_string_type(const typet &type); + + static bool is_java_string_builder_type(const typet &type); + + static bool is_java_string_builder_pointer_type(const typet &type); + + static bool is_java_char_sequence_type(const typet &type); + symbol_exprt fresh_array( const typet &type, const source_locationt &location); symbol_exprt fresh_string( diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp index 012d30eb8de..3362c71e0db 100644 --- a/src/solvers/refinement/refined_string_type.cpp +++ b/src/solvers/refinement/refined_string_type.cpp @@ -48,98 +48,3 @@ bool refined_string_typet::is_c_string_type(const typet &type) to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; } -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_pointer_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string pointers - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_pointer_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - return is_java_string_type(subtype); - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_type(const typet &type) -{ - if(type.id()==ID_symbol) - { - irep_idt tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - irep_idt tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_builder_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string builder - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_builder_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - irep_idt tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_char_sequence_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java char sequence - -\*******************************************************************/ - -bool refined_string_typet::is_java_char_sequence_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } - } - return false; -} diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h index 3ecc0ac3a9f..3b36b19e826 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/solvers/refinement/refined_string_type.h @@ -33,41 +33,20 @@ class refined_string_typet: public struct_typet const typet &get_char_type() const { - assert(components().size()==2); - return components()[0].type(); + return get_content_type().subtype(); } const typet &get_index_type() const { - return get_content_type().size().type(); + assert(components().size()==2); + return components()[0].type(); } - // For C the unrefined string type is __CPROVER_string, for java it is a - // pointer to a struct with tag java.lang.String + // For C the unrefined string type is __CPROVER_string static bool is_c_string_type(const typet &type); - static bool is_java_string_pointer_type(const typet &type); - - static bool is_java_string_type(const typet &type); - - static bool is_java_string_builder_type(const typet &type); - - static bool is_java_char_sequence_type(const typet &type); - - static bool is_unrefined_string_type(const typet &type) - { - return ( - is_c_string_type(type) || - is_java_string_pointer_type(type) || - is_java_string_builder_type(type) || - is_java_char_sequence_type(type)); - } - - static bool is_unrefined_string(const exprt &expr) - { - return (is_unrefined_string_type(expr.type())); - } + static bool is_refined_string_type(const typet &type); constant_exprt index_of_int(int i) const { @@ -75,9 +54,10 @@ class refined_string_typet: public struct_typet } }; -const refined_string_typet &to_refined_string_type(const typet &type) +extern inline const refined_string_typet &to_refined_string_type( + const typet &type) { - assert(type.id()==ID_struct); + assert(refined_string_typet::is_refined_string_type(type)); return static_cast(type); } diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 90769747949..64ffabfd266 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -132,7 +132,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant - assert(refined_string_typet::is_unrefined_string_type(arg.type())); + assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); // It seems the value of the string is lost, diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 93db048458b..283de683bd6 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -186,14 +186,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv) + { + return add_axioms_for_index_of( + str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else { string_exprt sub=add_axioms_for_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } - else - return add_axioms_for_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); } exprt string_constraint_generatort::add_axioms_for_last_index_of( @@ -280,12 +282,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv) + { + return add_axioms_for_last_index_of( + str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else { string_exprt sub=add_axioms_for_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } - else - return add_axioms_for_last_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); } diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 5a269e5ad3b..0179a2fcb61 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -215,10 +215,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { assert( - refined_string_typet::is_unrefined_string_type(expr.true_case().type())); + refined_string_typet::is_c_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( - refined_string_typet::is_unrefined_string_type(expr.false_case().type())); + refined_string_typet::is_c_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 16b89bc43c8..db9a2c460cc 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -146,7 +146,7 @@ bvt string_refinementt::convert_symbol(const exprt &expr) const irep_idt &identifier=expr.get(ID_identifier); assert(!identifier.empty()); - if(refined_string_typet::is_unrefined_string_type(type)) + if(refined_string_typet::is_refined_string_type(type)) { string_exprt str= generator.find_or_add_string_of_symbol(to_symbol_expr(expr)); @@ -216,7 +216,7 @@ bool string_refinementt::boolbv_set_equality_to_true(const equal_exprt &expr) if(expr.rhs().id()==ID_typecast) { exprt uncast=to_typecast_expr(expr.rhs()).op(); - if(refined_string_typet::is_unrefined_string_type(uncast.type())) + if(refined_string_typet::is_refined_string_type(uncast.type())) { debug() << "(sr) detected casted string" << eom; symbol_exprt sym=to_symbol_expr(expr.lhs()); @@ -225,7 +225,7 @@ bool string_refinementt::boolbv_set_equality_to_true(const equal_exprt &expr) } } - if(refined_string_typet::is_unrefined_string_type(type)) + if(refined_string_typet::is_refined_string_type(type)) { symbol_exprt sym=to_symbol_expr(expr.lhs()); generator.set_string_symbol_equal_to_expr(sym, expr.rhs()); From 70af9ce6148e03ed7c2179642486651db06ff47f Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 24 Feb 2017 11:02:55 +0000 Subject: [PATCH 03/15] Moving refined_string_type to util This is a more appropriate location for this module since it's used both in the preprocessing of goto-programs and the string solver. --- src/goto-programs/string_refine_preprocess.cpp | 3 +-- src/solvers/refinement/string_constraint.h | 2 +- src/solvers/refinement/string_constraint_generator.h | 2 +- src/{solvers/refinement => util}/refined_string_type.cpp | 0 src/{solvers/refinement => util}/refined_string_type.h | 4 ++-- 5 files changed, 5 insertions(+), 6 deletions(-) rename src/{solvers/refinement => util}/refined_string_type.cpp (100%) rename src/{solvers/refinement => util}/refined_string_type.h (93%) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index b14153c26c1..ec02988d577 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -14,10 +14,9 @@ Date: September 2016 #include #include #include +#include #include #include -// TODO: refined_string_type should be moved to util -#include #include #include "string_refine_preprocess.h" diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 61a6f3ebdd6..4b8d1229e09 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -15,7 +15,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include -#include +#include class string_constraintt: public exprt { diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 2a56f09fddd..ed898cb9a18 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,7 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include -#include +#include #include class string_constraint_generatort diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/util/refined_string_type.cpp similarity index 100% rename from src/solvers/refinement/refined_string_type.cpp rename to src/util/refined_string_type.cpp diff --git a/src/solvers/refinement/refined_string_type.h b/src/util/refined_string_type.h similarity index 93% rename from src/solvers/refinement/refined_string_type.h rename to src/util/refined_string_type.h index 3b36b19e826..477b00139ed 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -10,8 +10,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#ifndef CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H -#define CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H +#ifndef CPROVER_UTIL_REFINED_STRING_TYPE_H +#define CPROVER_UTIL_REFINED_STRING_TYPE_H #include #include From 2929d7f04a77c3a805c61e09d19c1a9d99423411 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 27 Feb 2017 11:14:11 +0000 Subject: [PATCH 04/15] Removed distinction between c string type and refined string type --- src/util/refined_string_type.cpp | 9 +++++---- src/util/refined_string_type.h | 4 ---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/util/refined_string_type.cpp b/src/util/refined_string_type.cpp index 3362c71e0db..0625149cf83 100644 --- a/src/util/refined_string_type.cpp +++ b/src/util/refined_string_type.cpp @@ -29,22 +29,23 @@ refined_string_typet::refined_string_typet( array_typet char_array(char_type, infinite_index); components().emplace_back("length", index_type); components().emplace_back("content", char_array); + set_tag(CPROVER_PREFIX"refined_string_type"); } /*******************************************************************\ -Function: refined_string_typet::is_c_string_type +Function: refined_string_typet::is_refined_string_type Inputs: a type - Outputs: Boolean telling whether the type is that of C strings + Outputs: Boolean telling whether the input is a refined string type \*******************************************************************/ -bool refined_string_typet::is_c_string_type(const typet &type) +bool refined_string_typet::is_refined_string_type(const typet &type) { return type.id()==ID_struct && - to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; + to_struct_type(type).get_tag()==CPROVER_PREFIX"refined_string_type"; } diff --git a/src/util/refined_string_type.h b/src/util/refined_string_type.h index 477b00139ed..a0cb7500e7f 100644 --- a/src/util/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -42,10 +42,6 @@ class refined_string_typet: public struct_typet return components()[0].type(); } - // For C the unrefined string type is __CPROVER_string - - static bool is_c_string_type(const typet &type); - static bool is_refined_string_type(const typet &type); constant_exprt index_of_int(int i) const From 5cb0e5e09eac930280c6aa53a0c6be0410700820 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 27 Feb 2017 11:14:21 +0000 Subject: [PATCH 05/15] Removed mention of c string type Refined string type should be used instead as we are trying to be language independent. --- src/solvers/refinement/string_constraint_generator_main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 0179a2fcb61..7fe510c2e35 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -215,10 +215,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { assert( - refined_string_typet::is_c_string_type(expr.true_case().type())); + refined_string_typet::is_refined_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( - refined_string_typet::is_c_string_type(expr.false_case().type())); + refined_string_typet::is_refined_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); From 32cecfba886909ae1c272b1a6780f3ae958023b5 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 7 Feb 2017 11:11:47 +0000 Subject: [PATCH 06/15] Making add_string_type more generic The class_name is now passed as argument so that we can reuse this function for StringBuilder and other "String-like" classes. --- .../java_bytecode_convert_class.cpp | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index f21b0bb1ce2..7f52c6ef77b 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget convert(parse_tree.parsed_class); else if(string_refinement_enabled && parse_tree.parsed_class.name=="java.lang.String") - add_string_type(); + add_string_type("java.lang.String"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuilder") + add_string_type("java.lang.StringBuilder"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.CharSequence") + add_string_type("java.lang.CharSequence"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuffer") + add_string_type("java.lang.StringBuffer"); else generate_class_stub(parse_tree.parsed_class.name); } @@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); - void add_string_type(); + void add_string_type(const irep_idt &class_name); }; /*******************************************************************\ @@ -406,15 +415,17 @@ bool java_bytecode_convert_class( Function: java_bytecode_convert_classt::add_string_type + Inputs: a name for the class such as "java.lang.String" + Purpose: Implements the java.lang.String type in the case that we provide an internal implementation. \*******************************************************************/ -void java_bytecode_convert_classt::add_string_type() +void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name) { class_typet string_type; - string_type.set_tag("java.lang.String"); + string_type.set_tag(class_name); string_type.components().resize(3); string_type.components()[0].set_name("@java.lang.Object"); string_type.components()[0].set_pretty_name("@java.lang.Object"); @@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type() string_type.add_base(symbol_typet("java::java.lang.Object")); symbolt string_symbol; - string_symbol.name="java::java.lang.String"; - string_symbol.base_name="java.lang.String"; + string_symbol.name="java::"+id2string(class_name); + string_symbol.base_name=id2string(class_name); string_symbol.type=string_type; string_symbol.is_type=true; @@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type() symbolt string_equals_symbol; string_equals_symbol.name= "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; - string_equals_symbol.base_name="java.lang.String.equals"; - string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.base_name=id2string(class_name)+".equals"; + string_equals_symbol.pretty_name=id2string(class_name)+".equals"; string_equals_symbol.mode=ID_java; code_typet string_equals_type; From c97d82ff561c55079ae9639c26100112d8e40059 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 9 Mar 2017 15:02:08 +0000 Subject: [PATCH 07/15] new identifier for converting char pointer to array --- src/util/irep_ids.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index 066a5f831cc..470a0f01c78 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -743,6 +743,7 @@ string_constraint string_not_contains_constraint cprover_char_literal_func cprover_string_literal_func +cprover_string_array_of_char_pointer_func cprover_string_char_at_func cprover_string_char_set_func cprover_string_code_point_at_func From 37650c9cfb24bc59c87e202433045015f5fed305 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 17 Feb 2017 13:18:54 +0000 Subject: [PATCH 08/15] Many corrections in preprocessing of strings Better signatures and string detection Better detection of string-like types and handling of char arrays Changing the way we deal with string initialisation from array Factorized part of type recognition and added StringBuilder and CharSequence to the list of java classes that should be considered as string. Changed the way we deal with StringBuilders: instead of having a map we add an assignment to the instructions. We also detect char arrays and handle them better. We now use substring and copy functions for initialisation from char array since the argument are always transformed into refined strings. For each string returned by a string function we also add into the java_string_to_cprover_string map a string_exprt. Corrected detection of typecast in make_cprover_string_assign Ensuring refined_string arguments of function applications are string_exprt Correct string_refine_preprocesst constructor. Order of initialisation is now the same as the order of declaration. This was picked up by g++ and clang. Added signatures for some StringBuilder functions. Removed map java_to_cprover_string and adapt signature for side effects. The usage of a map is not correct since strings can be modified by side effects. Signature is necessary for StringBuilders to be assigned in the right way with methods with side effects. Assign all string_exprt to refined string symbols in preprocessing. This makes it then easier to debug and to find witnesses for counter examples in the solver. Make signatures take priority over actual type and add signature for intern. Linting corrections Adding malloc for char arrays for String.toCharArray Fixing preprocessing of string function with side effect This fixes problems we were getting with some StringBuilder functions. The return value should contain a pointer to the original StringBuilder and the fields of the StringBuilder should be filled with the result of the function call. Corrected mistake in preprocessing of string functions with side effects char array assignements returns a string_exprt This is to be uniform with other preprocessing functions also returning string_exprt Preprocessing for StringBuilder.append on char array Corrected update of signature and commented out some unused code Corrected the initialization from char array We cannot use the substring function because the meaning of the third argument is different between `String.([CII)` and `String.substring(SII)` Using new id for conversion between char pointer and array Cleaning of preprocessing for strings Removed useless functions and merged some maps Make a copy of the function call to be modified Removed redeclaration of location --- .../string_refine_preprocess.cpp | 797 +++++++++--------- src/goto-programs/string_refine_preprocess.h | 71 +- 2 files changed, 416 insertions(+), 452 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index ec02988d577..8403d4bf044 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -23,6 +23,32 @@ Date: September 2016 /*******************************************************************\ +Function: string_refine_preprocesst::check_java_type + + Inputs: a type and a string + + Outputs: Boolean telling whether the type is a struct with the given + tag or a symbolic type with the tag prefixed by "java::" + +\*******************************************************************/ + +bool string_refine_preprocesst::check_java_type( + const typet &type, const std::string &tag) +{ + if(type.id()==ID_symbol) + { + irep_idt tag_id=to_symbol_type(type).get_identifier(); + return tag_id=="java::"+tag; + } + else if(type.id()==ID_struct) + { + irep_idt tag_id=to_struct_type(type).get_tag(); + return tag_id==tag; + } + return false; +} +/*******************************************************************\ + Function: string_refine_preprocesst::is_java_string_pointer_type Inputs: a type @@ -54,17 +80,7 @@ Function: string_refine_preprocesst::is_java_string_type bool string_refine_preprocesst::is_java_string_type(const typet &type) { - if(type.id()==ID_symbol) - { - const irep_idt &tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; + return check_java_type(type, "java.lang.String"); } /*******************************************************************\ @@ -79,17 +95,7 @@ Function: string_refine_preprocesst::is_java_string_builder_type bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) { - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; + return check_java_type(type, "java.lang.StringBuilder"); } /*******************************************************************\ @@ -114,6 +120,7 @@ bool string_refine_preprocesst::is_java_string_builder_pointer_type( } return false; } + /*******************************************************************\ Function: string_refine_preprocesst::is_java_char_sequence_type @@ -125,16 +132,67 @@ Function: string_refine_preprocesst::is_java_char_sequence_type \*******************************************************************/ bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + return check_java_type(type, "java.lang.CharSequence"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_pointer_type( + const typet &type) { if(type.id()==ID_pointer) { const pointer_typet &pt=to_pointer_type(type); const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } + return is_java_char_sequence_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_type(const typet &type) +{ + return check_java_type(type, "array[char]"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_array_type(subtype); } return false; } @@ -217,6 +275,32 @@ void string_refine_preprocesst::declare_function( /*******************************************************************\ +Function: string_refine_preprocesst::get_data_and_length_type_of_char_array + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a char array, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type) +{ + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + Function: string_refine_preprocesst::get_data_and_length_type_of_string Inputs: an expression, a reference to a data type and a reference to a @@ -232,7 +316,8 @@ void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { assert(is_java_string_type(expr.type()) || - is_java_string_builder_type(expr.type())); + is_java_string_builder_type(expr.type()) || + is_java_char_sequence_type(expr.type())); typet object_type=ns.follow(expr.type()); const struct_typet &struct_type=to_struct_type(object_type); for(const auto &component : struct_type.components()) @@ -263,44 +348,43 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(is_java_string_pointer_type(rhs.type())) + if(implements_java_char_sequence(rhs.type())) { - auto pair=java_to_cprover_strings.insert( - std::pair(rhs, nil_exprt())); - - if(pair.second) - { - // We do the following assignments: - // cprover_string_array = *(rhs->data) - // cprover_string = { rhs->length; cprover_string_array } - - dereference_exprt deref(rhs, rhs.type().subtype()); - - typet data_type, length_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - member_exprt length(deref, "length", length_type); - symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); - - // string expression for the rhs of the second assignment - string_exprt new_rhs( - length, array_lhs, refined_string_typet(length_type, data_type)); - - member_exprt data(deref, "data", data_type); - dereference_exprt deref_data(data, data_type.subtype()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - - std::list assignments; - assignments.emplace_back(array_lhs, deref_data); - assignments.emplace_back(lhs, new_rhs); - insert_assignments( - goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - pair.first->second=lhs; - } - return pair.first->second; + // We do the following assignments: + // 1 length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // 1) cprover_string_length= *(rhs->length) + member_exprt length(deref, "length", length_type); + + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); + + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(length, array_lhs, ref_type); + + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; } else if(rhs.id()==ID_typecast && - is_java_string_pointer_type(rhs.op0().type())) + implements_java_char_sequence(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, target, rhs.op0(), location); @@ -312,6 +396,75 @@ exprt string_refine_preprocesst::make_cprover_string_assign( /*******************************************************************\ +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression of + type char array pointer and a location + + Outputs: a string expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +string_exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + assert(is_java_char_array_pointer_type(rhs.type())); + + // We do the following assignments: + // deref=*(rhs->data) + // array= typecast(&deref); + // string={ rhs->length; array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_char_array(deref, data_type, length_type); + assert(data_type.id()==ID_pointer); + typet char_type=to_pointer_type(data_type).subtype(); + + refined_string_typet ref_type(length_type, java_char_type()); + typet content_type=ref_type.get_content_type(); + std::list assignments; + + // deref=*(rhs->data) + member_exprt array_rhs(deref, "data", data_type); + dereference_exprt deref_array(array_rhs, data_type.subtype()); + symbolt sym_lhs_deref=get_fresh_aux_symbol( + data_type.subtype(), + "char_array_assign$deref", + "char_array_assign$deref", + location, + ID_java, + symbol_table); + symbol_exprt lhs_deref=sym_lhs_deref.symbol_expr(); + assignments.emplace_back(lhs_deref, deref_array); + + // array=convert_pointer_to_char_array(*rhs->data) + declare_function(ID_cprover_string_array_of_char_pointer_func, content_type); + function_application_exprt fun_app(symbol_exprt( + ID_cprover_string_array_of_char_pointer_func), content_type); + fun_app.arguments().push_back(deref_array); + symbol_exprt array=fresh_array(content_type, location); + assignments.emplace_back(array, fun_app); + + // string={ rhs->length; string_array } + string_exprt new_rhs(get_length(deref, length_type), array, ref_type); + symbol_exprt lhs=fresh_string(ref_type, location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; +} + +/*******************************************************************\ + Function: string_refine_preprocesst::make_normal_assign Inputs: a goto_program, a position in this program, an expression lhs, @@ -336,27 +489,19 @@ void string_refine_preprocesst::make_normal_assign( const source_locationt &location, const std::string &signature) { - if(function_name==ID_cprover_string_copy_func) - { - assert(!arguments.empty()); - make_string_copy(goto_program, target, lhs, arguments[0], location); - } - else - { - function_application_exprt rhs( - symbol_exprt(function_name), function_type.return_type()); - rhs.add_source_location()=location; - declare_function(function_name, function_type); + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); - exprt::operandst processed_arguments=process_arguments( - goto_program, target, arguments, location, signature); - rhs.arguments()=processed_arguments; + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs.arguments()=processed_arguments; - code_assignt assignment(lhs, rhs); - assignment.add_source_location()=location; - target->make_assignment(); - target->code=assignment; - } + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + target->make_assignment(); + target->code=assignment; } /*******************************************************************\ @@ -420,7 +565,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(is_java_string_pointer_type(function_type.return_type())); + assert(implements_java_char_sequence(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -454,98 +599,22 @@ void string_refine_preprocesst::make_string_assign( malloc_expr.type()=pointer_typet(object_type); malloc_expr.add_source_location()=location; + refined_string_typet ref_type(length_type, data_type.subtype().subtype()); + string_exprt str(tmp_length, tmp_array, ref_type); + symbol_exprt cprover_string_sym=fresh_string(ref_type, location); + std::list assigns; assigns.emplace_back(lhs, malloc_expr); assigns.emplace_back(tmp_length, rhs_length); assigns.emplace_back(lhs_length, tmp_length); assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(cprover_string_sym, str); assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); insert_assignments(goto_program, target, target->function, location, assigns); } /*******************************************************************\ -Function: string_refine_preprocesst::make_assign - - Inputs: a goto_program, a position in this program, an expression lhs, - a function type, a function name, a vector of arguments, a location - and a signature - - Purpose: assign the result of the function application to lhs, - in case the function type is string, it does a special assignment - using `make_string_assign` - -\*******************************************************************/ - -void string_refine_preprocesst::make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig) -{ - if(is_java_string_pointer_type(function_type.return_type())) - make_string_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); - else - make_normal_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_string_copy - - Inputs: a goto_program, a position in this program, a lhs expression, - an argument expression and a location - - Outputs: an expression - - Purpose: replace the current instruction by: - > lhs->length=argument->length - > tmp_data=*(argument->data) - > lhs->data=&tmp_data - -\*******************************************************************/ - -void string_refine_preprocesst::make_string_copy( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, - const source_locationt &location) -{ - // TODO : treat CharSequence and StringBuffer - assert(is_java_string_pointer_type(lhs.type()) || - is_java_string_builder_pointer_type(lhs.type())); - exprt deref=dereference_exprt(lhs, lhs.type().subtype()); - - typet length_type, data_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - - dereference_exprt deref_arg(argument, argument.type().subtype()); - std::list assignments; - - exprt lhs_length=get_length(deref, length_type); - exprt rhs_length=get_length(deref_arg, length_type); - assignments.emplace_back(lhs_length, rhs_length); - - symbol_exprt tmp_data=fresh_array(data_type.subtype(), location); - exprt rhs_data=get_data(deref_arg, data_type); - exprt lhs_data=get_data(deref, data_type); - assignments.emplace_back( - tmp_data, dereference_exprt(rhs_data, data_type.subtype())); - assignments.emplace_back(lhs_data, address_of_exprt(tmp_data)); - - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - Function: string_refine_preprocesst::make_string_function Inputs: a position in a goto program, a function name, an expression lhs, @@ -566,7 +635,36 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(is_java_string_pointer_type(function_type.return_type())) + if(signature.length()>0) + { + if(signature.back()=='S') + { + code_typet ft=function_type; + ft.return_type()=jls_ptr; + typecast_exprt lhs2(lhs, jls_ptr); + + make_string_assign( + goto_program, + target, + lhs2, + ft, + function_name, + arguments, + location, + signature); + } + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + } + else if(implements_java_char_sequence(function_type.return_type())) make_string_assign( goto_program, target, @@ -597,9 +695,11 @@ Function: string_refine_preprocesst::make_string_function Purpose: at the current position replace `lhs=s.some_function(x,...)` by `lhs=function_name(s,x,...)`; option `assign_first_arg` uses `s` instead of `lhs` in the resulting - expression; + expression, Warning : it assumes that `s` is string-like option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one + the first one; + arguments that are string (TODO: and char array) are replaced + by string_exprt \*******************************************************************/ @@ -635,11 +735,17 @@ void string_refine_preprocesst::make_string_function( new_type.parameters().push_back(function_type.parameters()[i]); } + std::string new_sig=signature; exprt lhs; if(assign_first_arg) { assert(!function_call.arguments().empty()); lhs=function_call.arguments()[0]; + std::size_t size=function_call.arguments().size(); + if(signature.length()<=size) + new_sig.resize(size+1, '_'); + + new_sig.replace(size, 1, "S"); } else lhs=function_call.lhs(); @@ -650,7 +756,7 @@ void string_refine_preprocesst::make_string_function( new_type.return_type()=lhs.type(); make_string_function( - goto_program, target, lhs, new_type, function_name, args, loc, signature); + goto_program, target, lhs, new_type, function_name, args, loc, new_sig); } /*******************************************************************\ @@ -680,9 +786,11 @@ Function: string_refine_preprocesst::make_string_function_side_effect Inputs: a position in a goto program and a function name - Purpose: at the current position, replace `r=s.some_function(x,...)` - by `s=function_name(s,x,...)` and add a correspondance from r - to s in the `string_builders` map + Purpose: at the current position, replace `r=s.some_function(x,...)` by + > tmp=function_name(x,...) + > s->data=tmp.data + > s->length=tmp.length + > r=s \*******************************************************************/ @@ -692,11 +800,48 @@ void string_refine_preprocesst::make_string_function_side_effect( const irep_idt &function_name, const std::string &signature) { - const code_function_callt function_call=to_code_function_call(target->code); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; - make_string_function( - goto_program, target, function_name, signature, true, false); + code_function_callt function_call=to_code_function_call(target->code); + source_locationt loc=function_call.source_location(); + std::list assignments; + exprt lhs=function_call.lhs(); + exprt s=function_call.arguments()[0]; + code_typet function_type=to_code_type(function_call.type()); + + function_type.return_type()=s.type(); + + if(lhs.is_not_nil()) + { + symbol_exprt tmp_string=fresh_string(lhs.type(), loc); + + make_string_assign( + goto_program, + target, + tmp_string, + function_type, + function_name, + function_call.arguments(), + loc, + signature); + dereference_exprt deref_lhs(s, s.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref_lhs, data_type, length_type); + member_exprt lhs_data(deref_lhs, "data", data_type); + member_exprt lhs_length(deref_lhs, "length", length_type); + dereference_exprt deref_rhs(tmp_string, s.type().subtype()); + member_exprt rhs_data(deref_rhs, "data", data_type); + member_exprt rhs_length(deref_rhs, "length", length_type); + assignments.emplace_back(lhs_length, rhs_length); + assignments.emplace_back(lhs_data, rhs_data); + assignments.emplace_back(lhs, s); + target=goto_program.insert_after(target); + insert_assignments( + goto_program, target, target->function, loc, assignments); + } + else + { + make_string_function( + goto_program, target, function_name, signature, true, false); + } } /*******************************************************************\ @@ -735,6 +880,7 @@ Function: string_refine_preprocesst::make_to_char_array_function Inputs: a goto program and a position in that goto program Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0 = malloc(array[char]); > return_tmp0->data=&((s->data)[0]) > return_tmp0->length=s->length @@ -744,16 +890,36 @@ void string_refine_preprocesst::make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &target) { const code_function_callt &function_call=to_code_function_call(target->code); + source_locationt location=function_call.source_location(); - assert(!function_call.arguments().empty()); + assert(function_call.arguments().size()>=1); const exprt &string_argument=function_call.arguments()[0]; assert(is_java_string_pointer_type(string_argument.type())); + typet deref_type=function_call.lhs().type().subtype(); + const exprt &lhs=function_call.lhs(); + dereference_exprt deref_lhs(lhs, deref_type); + dereference_exprt deref(string_argument, string_argument.type().subtype()); typet length_type, data_type; get_data_and_length_type_of_string(deref, data_type, length_type); std::list assignments; + // lhs=malloc(array[char]) + typet object_type=ns.follow(deref_type); + exprt object_size=size_of_expr(object_type, ns); + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_to_char_array_function " + << "got nil object_size" << eom; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + assignments.emplace_back(lhs, malloc_expr); + + // &((s->data)[0]) exprt rhs_data=get_data(deref, data_type); dereference_exprt rhs_array(rhs_data, data_type.subtype()); @@ -762,8 +928,6 @@ void string_refine_preprocesst::make_to_char_array_function( address_of_exprt rhs_pointer(first_element); // return_tmp0->data=&((s->data)[0]) - typet deref_type=function_call.lhs().type().subtype(); - dereference_exprt deref_lhs(function_call.lhs(), deref_type); exprt lhs_data=get_data(deref_lhs, data_type); assignments.emplace_back(lhs_data, rhs_pointer); @@ -771,178 +935,8 @@ void string_refine_preprocesst::make_to_char_array_function( exprt rhs_length=get_length(deref, length_type); exprt lhs_length=get_length(deref_lhs, length_type); assignments.emplace_back(lhs_length, rhs_length); - source_locationt location=target->source_location; - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_cprover_char_array_assign - - Inputs: a goto_program, a position in this program, an expression and a - location - - Outputs: a char array expression (not a pointer) - - Purpose: Introduce a temporary variable for cprover strings; - returns the cprover_string corresponding to rhs - -\*******************************************************************/ - -exprt string_refine_preprocesst::make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location) -{ - // TODO : add an assertion on the type of rhs - - // We do the following assignments: - // cprover_string_array = rhs.data - // cprover_string = { rhs.length; cprover_string_array } - - // string expression for the rhs of the second assignment - string_exprt new_rhs(java_char_type()); - - typet data_type=new_rhs.content().type(); - typet length_type=java_int_type(); - - symbol_exprt array_lhs=fresh_array(data_type, location); - exprt array_rhs=get_data(rhs, new_rhs.content().type()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - new_rhs.length()=get_length(rhs, length_type); - new_rhs.content()=array_lhs; - - std::list assignments; - assignments.emplace_back(array_lhs, array_rhs); - assignments.emplace_back(lhs, new_rhs); insert_assignments( goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - return lhs; -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_char_array_function - - Inputs: a position in a goto program, a function name, two Boolean options, - and the index of the char array argument in the function - - Purpose: at the given position replace - `lhs=s.some_function(...,char_array,...)` by - > cprover_string = { char_array->length, *char_array } - > tmp_string=function_name(s, cprover_string, ...) - option `assign_first_arg` uses `s` instead of `lhs` in the second - assignment; - option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one; - argument index gives the index of the argument containing char_array - -\*******************************************************************/ - -void string_refine_preprocesst::make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg, - bool skip_first_arg) -{ - code_function_callt &function_call=to_code_function_call(target->code); - code_typet function_type=to_code_type(function_call.function().type()); - code_typet new_function_type; - const source_locationt &location=function_call.source_location(); - assert(!function_call.arguments().size()>index); - const std::vector &args=function_call.arguments(); - std::vector new_args; - - exprt lhs; - if(assign_first_arg) - { - assert(!function_call.arguments().empty()); - lhs=function_call.arguments()[0]; - else - lhs=function_call.lhs(); - - if(lhs.id()==ID_typecast) - lhs=to_typecast_expr(lhs).op(); - - exprt char_array=dereference_exprt( - function_call.arguments()[index], - function_call.arguments()[index].type().subtype()); - exprt string=make_cprover_char_array_assign( - goto_program, target, char_array, location); - - std::size_t start_index=skip_first_arg?1:0; - for(std::size_t i=start_index; icode); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; } /*******************************************************************\ @@ -974,22 +968,20 @@ exprt::operandst string_refine_preprocesst::process_arguments( for(std::size_t i=0; isecond); - else + if(iis_function_call()) { code_function_callt &function_call=to_code_function_call(target->code); - for(auto &arg : function_call.arguments()) - { - auto sb_it=string_builders.find(arg); - if(sb_it!=string_builders.end()) - arg=sb_it->second; - } if(function_call.function().id()==ID_symbol) { @@ -1070,21 +1056,6 @@ void string_refine_preprocesst::replace_string_calls( make_string_function_call( goto_program, target, it->second, signature); - it=string_of_char_array_functions.find(function_id); - if(it!=string_of_char_array_functions.end()) - make_char_array_function( - goto_program, target, it->second, signature, 0); - - it=string_of_char_array_function_calls.find(function_id); - if(it!=string_of_char_array_function_calls.end()) - make_char_array_function_call( - goto_program, target, it->second, signature); - - it=side_effect_char_array_functions.find(function_id); - if(it!=side_effect_char_array_functions.end()) - make_char_array_side_effect( - goto_program, target, it->second, signature); - if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) make_to_char_array_function(goto_program, target); } @@ -1299,12 +1270,18 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_set_length_func; - side_effect_char_array_functions + + side_effect_functions + ["java::java.lang.StringBuilder.append:([C)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; - side_effect_char_array_functions + ID_cprover_string_insert_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; + ID_cprover_string_insert_func; + // TODO clean irep ids from insert_char_array etc... string_function_calls ["java::java.lang.String.:(Ljava/lang/String;)V"]= @@ -1320,23 +1297,23 @@ void string_refine_preprocesst::initialize_string_function_table() string_function_calls["java::java.lang.StringBuilder.:()V"]= ID_cprover_string_empty_string_func; - string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= - ID_cprover_string_of_char_array_func; + string_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_copy_func; - string_of_char_array_functions + string_functions ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= ID_cprover_string_literal_func; @@ -1370,9 +1347,23 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_of_int_func; signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; - signatures - ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= - "SSZ"; + signatures["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; + signatures["java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;"]="SIZS"; + signatures["java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;"]="SIJS"; + signatures["java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;"]="SIIS"; + signatures["java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;"]="SICS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(I[C)" + "Ljava/lang/StringBuilder;"]="SI[S"; + signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; } /*******************************************************************\ diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index 613d0161926..5ae398b757a 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -14,6 +14,7 @@ Date: September 2016 #include #include +#include class string_refine_preprocesst:public messaget { @@ -29,28 +30,21 @@ class string_refine_preprocesst:public messaget typedef std::unordered_map id_mapt; typedef std::unordered_map expr_mapt; - // String builders maps the different names of a same StringBuilder object - // to a unique expression. - expr_mapt string_builders; - // Map name of Java string functions to there equivalent in the solver id_mapt side_effect_functions; id_mapt string_functions; id_mapt c_string_functions; id_mapt string_function_calls; - id_mapt string_of_char_array_functions; - id_mapt string_of_char_array_function_calls; - id_mapt side_effect_char_array_functions; std::unordered_map signatures; - expr_mapt hidden_strings; - expr_mapt java_to_cprover_strings; // unique id for each newly created symbols int next_symbol_id; void initialize_string_function_table(); + static bool check_java_type(const typet &type, const std::string &tag); + static bool is_java_string_pointer_type(const typet &type); static bool is_java_string_type(const typet &type); @@ -61,6 +55,20 @@ class string_refine_preprocesst:public messaget static bool is_java_char_sequence_type(const typet &type); + static bool is_java_char_sequence_pointer_type(const typet &type); + + static bool is_java_char_array_type(const typet &type); + + static bool is_java_char_array_pointer_type(const typet &type); + + static bool implements_java_char_sequence(const typet &type) + { + return + is_java_char_sequence_pointer_type(type) || + is_java_string_builder_pointer_type(type) || + is_java_string_pointer_type(type); + } + symbol_exprt fresh_array( const typet &type, const source_locationt &location); symbol_exprt fresh_string( @@ -110,6 +118,9 @@ class string_refine_preprocesst:public messaget void get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type); + void get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type); + function_application_exprt build_function_application( const irep_idt &function_name, const typet &type, @@ -136,27 +147,16 @@ class string_refine_preprocesst:public messaget const source_locationt &location, const std::string &signature); - void make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig); - exprt make_cprover_string_assign( goto_programt &goto_program, goto_programt::targett &target, const exprt &rhs, const source_locationt &location); - void make_string_copy( + string_exprt make_cprover_char_array_assign( goto_programt &goto_program, goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, + const exprt &rhs, const source_locationt &location); void make_string_function( @@ -192,33 +192,6 @@ class string_refine_preprocesst:public messaget void make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &); - exprt make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location); - - void make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg=false, - bool skip_first_arg=false); - - void make_char_array_function_call( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - - void make_char_array_side_effect( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - void replace_string_calls(goto_functionst::function_mapt::iterator f_it); }; From b75d745dc62d078257e1cdc3c4a5a504a0e270e2 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 28 Feb 2017 13:50:25 +0000 Subject: [PATCH 09/15] Many corrections in string refinement Overhaul of string solver contributions The code adding lemmas to the solver for each equality has been completely remastered. It has been simplified and should now cover all foreseable cases. Add overflow constraints on sums for string solver When creating sum expressions during string refinement, e.g. when adding string lengths, we shall add axioms to prevent the solver from finding values that actually come from an integer overflow. Therefore, we introduce a new method plus_exprt_with_overflow_check() that wraps around plus_exprt and generates the relevant axioms. Cache function applications Cache converted function applications to avoid re-generating the axioms if it has already been done for a given function application. This seems to happen for function applications that are in an assertion, and thus are treated separately by convert_bitvector. Remove string_refinement::convert_symbol: the behaviour of this function was to convert java strings to strings of symbols. In hindsight, this was not a good idea, as we still refer to the actual java string fields in some cases, e.g. to read its length. Replace add_axioms_for_string_expr In all add_axioms_... methods, we replace all calls to add_axioms_for_string_expr by the newly created function get_string_expr, which simply returns a string_exprt when we give it an refined expr (such as a string_exprt or a symbol). Update doc in add_axioms_for_index_string Simplify constraint for is_prefix: apply distribution law and update (and refactor) documentation. Remove set_string_symbol_equal_to_expr() Removed mentions of java string type in the string solver Removing mentions of is_c_string_type To use the string solver in C, we should use a struct type with tag __CPROVER_refined_string_type Keep non-string axioms in a list Instead of giving axioms to super::boolbv_set_equality_to_true, which may contain un-substituted symbols, we keep them in a list and substitute them in dec_solve() before giving them to the function. Cleaning the add_axioms_for_string_expr method and renamed it to add_axioms_for_refined_string to signify that it should only be called on expression of type refined string. An assertion was added at the beginning to ensure that. Better get_array and string_of_array functions. Cleaned the implementation of get_array and factorized part of it. Made string_of_array directly take a array_exprt, which can be obtained from get_array. Improved string_of_array for non printable characters Resolve symbol to char arrays in non-string axioms. We introduce a mapping that expresses the maximal traversal of symbols of char array type. We use that map to, either obtain a char array from a symbol, either get a unique symbol the aliases equivalence class. This map is used to replace all symbols of char array type in the axioms that are added using the parent method: supert::boolbv_set_equality_to_true. Defining several intermediary functions for check_axioms and cleaning. Check axioms is quite complicated so we splitted it in several parts. Avoid using same universal variable name in string comparison function. Corrected test in index_of as characters could be signed Corrected return type retrieval in from_float functions. The return type should be given as argument of the helper function add_axioms_from_float. Fixed type of contains in constraint generation Introduce string_refinementt::set_to. We now override the set_to() method instead of set_equality_to_true(). This solves a problem where many lemmas were being dropped. Handle array_of in set_char_array_equality. Java assignments such as char[] str=new char[10] involves assigning the char array to 0, which is done using the array_of operator. This operator is now handled when found on the rhs of a char array assignment. Optimized string hash code and intern functions to only look at seen strings. We only compare the argument to the strings on which hash_code (resp. intern) was already called. Add missing override keyword in declarations Integrate strings into symbol resolution. We now use for strings the mechanism that was made for resolving symbols to char arrays. As a result, the symbol_to_string map has been removed. We introduce an unresolved_symbol map to associate symbols to string expressions that were introduced during constraint generation. Preventing return of nil exprt in sum_over_map Enforce -1 for witnesses when strings are unequal. This makes it easier to see when models for string have different length. Correct array index in get_array Correct a bad array access that caused a segmentation fault on the java_delete test. Adding option to concretize result and get method in string solver The option to concretize makes sure that the model that we get in the end is correct. Overiding the get method is necessary to get the actual valuation of string gotten by the solver. This two additions makes the trace generation for program with strings more likely to be actual traces of the program. Avoid adding axioms for copy Corrected `get` to not replace strings Simplifying lemmas before handing them to the solver This seems to improve performances Ignoring char array that are not symbol or constants Avoid creating new length variable for concat Adding constraint on positive length for refined strings Using get method of the parent class for the size Add function to fill the found_length map Signature for get_array should be constant Corrected overflow problem when integer get to the max size Removing overflow check in universal constraint Enforcing witness -1 when length is not sufficient in suffix Corrected index variable which should be universally quantified Factorizing addition to index set and more debug information Avoiding generating a fresh string for the empty string Enabling string solver to treat more cases and more debug infos Conversion from char pointer to array in the solver Raise a warning if found length is negative Ensure the arguments of parseInt are of the right format For now we add axioms asking for the argument to have the right format but ultimately we should raise an exception when it is not the case (which is not done right now). Corrects the specification of int to string conversion Some problems where encountered for strings with the maximal size possible for an int, which could cause an overflow. Disallow + sign in string from int Java will not add the sign for positive numbers Use get instead of current_model in check_axioms Streamline code of check_axioms() by calling get() insteand of relying on the 'current_model' variable. get() has been adapted to convert array-lists into with expressions, the former not being handled by the string solver. --- src/solvers/refinement/string_constraint.h | 1 - .../refinement/string_constraint_generator.h | 45 +- ...tring_constraint_generator_code_points.cpp | 18 +- ...string_constraint_generator_comparison.cpp | 102 +- .../string_constraint_generator_concat.cpp | 31 +- .../string_constraint_generator_constants.cpp | 9 +- .../string_constraint_generator_indexof.cpp | 27 +- .../string_constraint_generator_insert.cpp | 24 +- .../string_constraint_generator_main.cpp | 422 +++++--- .../string_constraint_generator_testing.cpp | 46 +- ...ng_constraint_generator_transformation.cpp | 39 +- .../string_constraint_generator_valueof.cpp | 101 +- src/solvers/refinement/string_refinement.cpp | 917 ++++++++++++++---- src/solvers/refinement/string_refinement.h | 66 +- 14 files changed, 1323 insertions(+), 525 deletions(-) diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 4b8d1229e09..1bb460349b7 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -48,7 +48,6 @@ class string_constraintt: public exprt return operands()[4]; } - private: string_constraintt(); diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index ed898cb9a18..12447e0cf04 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,6 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include +#include #include #include @@ -65,21 +66,20 @@ class string_constraint_generatort symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); string_exprt fresh_string(const refined_string_typet &type); + string_exprt get_string_expr(const exprt &expr); + string_exprt convert_java_string_to_string_exprt( + const exprt &underlying); + plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - // We maintain a map from symbols to strings. - std::map symbol_to_string; + // Maps unresolved symbols to the string_exprt that was created for them + std::map unresolved_symbols; - string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); - void assign_to_symbol( - const symbol_exprt &sym, const string_exprt &expr) - { - symbol_to_string[sym.get_identifier()]=expr; - } + string_exprt find_or_add_string_of_symbol( + const symbol_exprt &sym, + const refined_string_typet &ref_type); - string_exprt add_axioms_for_string_expr(const exprt &expr); - void set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str); + string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); @@ -96,6 +96,8 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; + std::map function_application_cache; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -117,6 +119,9 @@ class string_constraint_generatort // The specification is partial: the actual value is not actually computed // but we ensure that hash codes of equal strings are equal. exprt add_axioms_for_hash_code(const function_application_exprt &f); + // To each string on which hash_code was called we associate a symbol + // representing the return value of the hash_code function. + std::map hash_code_of_string; exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( @@ -224,7 +229,9 @@ class string_constraint_generatort // the start for negative number string_exprt add_axioms_from_float(const function_application_exprt &f); string_exprt add_axioms_from_float( - const exprt &f, bool double_precision=false); + const exprt &f, + const refined_string_typet &ref_type, + bool double_precision); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial @@ -253,6 +260,7 @@ class string_constraint_generatort string_exprt add_axioms_for_code_point( const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); + exprt add_axioms_for_char_pointer(const function_application_exprt &fun); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -270,6 +278,8 @@ class string_constraint_generatort const function_application_exprt &f); exprt add_axioms_for_parse_int(const function_application_exprt &f); + exprt add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size=10); exprt add_axioms_for_to_char_array(const function_application_exprt &f); exprt add_axioms_for_compare_to(const function_application_exprt &f); @@ -278,6 +288,9 @@ class string_constraint_generatort // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + // Pool used for the intern method + std::map intern_of_string; + // Tells which language is used. C and Java are supported irep_idt mode; @@ -293,14 +306,8 @@ class string_constraint_generatort exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; - static exprt character_equals_ignore_case( + exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - - // Pool used for the intern method - std::map pool; - - // Used to determine whether hashcode should be equal - std::map hash; }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 7c035a0d3e4..16763dfc97c 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( { typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; symbol_exprt result=fresh_symbol("char", return_type); exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt(pos, index1)]; + const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt is_low=is_low_surrogate( + str[plus_exprt_with_overflow_check(pos, index1)]); exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); assert(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt(index, offset); - exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + exprt minimum=plus_exprt_with_overflow_check(index, offset); + exprt maximum=plus_exprt_with_overflow_check( + index, plus_exprt_with_overflow_check(offset, offset)); axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 321282ee9c1..a51e2abe3cd 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals Outputs: a expression of Boolean type Purpose: add axioms stating that the result is true exactly when the strings - represented by the arguments are equal + represented by the arguments are equal. + the variable ending in `witness_unequal` is -1 if the length differs + or an index at which the strings are different \*******************************************************************/ @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals( symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness])); - implies_exprt a3( - not_exprt(eq), - or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing)); + and_exprt diff_length( + notequal_exprt(s1.length(), s2.length()), + equal_exprt(witness, from_integer(-1, index_type))); + implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing)); axioms.push_back(a3); return tc_eq; @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case( // p3 : (is_up2&&'a'-'A'+char2=char1) equal_exprt p1(char1, char2); minus_exprt diff=minus_exprt(char_a, char_A); - and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2)); - and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1)); + + // Overflow is not a problem here because is_upper_case conditions + // ensure that we are within a safe range. + and_exprt p2(is_upper_case_1, + equal_exprt(plus_exprt(diff, char1), char2)); + and_exprt p3(is_upper_case_2, + equal_exprt(plus_exprt(diff, char2), char1)); return or_exprt(or_exprt(p1, p2), p3); } @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( symbol_exprt eq=fresh_boolean("equal_ignore_case"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet char_type=to_refined_string_type(s1.type()).get_char_type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); typet return_type=f.type(); typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(hash.find(it->second)==hash.end()) - hash[it->second]=fresh_symbol("hash", return_type); + auto pair=hash_code_of_string.insert( + std::make_pair(str, fresh_symbol("hash", return_type))); + exprt hash=pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) // c2: |str|!=|s| - // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : hash_code_of_string) { symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(hash[it->second], hash[str]); - not_exprt c2(equal_exprt(it->second.length(), str.length())); + equal_exprt c1(it.second, hash); + not_exprt c2(equal_exprt(it.first.length(), str.length())); and_exprt c3( - equal_exprt(it->second.length(), str.length()), + equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt( str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i)))); axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } - return hash[str]; + return hash; } /*******************************************************************\ @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); symbol_exprt res=fresh_symbol("compare_to", return_type); typet index_type=s1.length().type(); @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // a1 : res==0 => |s1|=|s2| // a2 : forall i<|s1|. s1[i]==s2[i] // a3 : exists x. - // res!=0 ==> x> 0 && - // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) - // &&res=s1[x]-s2[x] ) - // || cond2: - // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) - // a4 : forall i s1[i]=s2[i] + // res!=0 ==> x > 0 + // && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i' s1[i]=s2[i] assert(return_type.id()==ID_signedbv); @@ -282,7 +288,9 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( or_exprt(cond1, cond2))); axioms.push_back(a3); - string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); + string_constraintt a4( + i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; @@ -304,15 +312,15 @@ Function: string_constraint_generatort::add_axioms_for_intern symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); + // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(pool.find(it->second)==pool.end()) - pool[it->second]=fresh_symbol("pool", return_type); + auto pair=intern_of_string.insert( + std::make_pair(str, fresh_symbol("pool", return_type))); + symbol_exprt intern=pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. @@ -320,30 +328,30 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) exprt disj=false_exprt(); - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : intern_of_string) disj=or_exprt( - disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + disj, equal_exprt(intern, it.second)); axioms.push_back(disj); // WARNING: the specification may be incomplete or incorrect - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(it->second!=str) + for(auto it : intern_of_string) + if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( - equal_exprt(pool[it->second], pool[str]), + equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it->second)), + not_exprt(str.axiom_for_has_same_length_as(it.first)), and_exprt( - str.axiom_for_has_same_length_as(it->second), + str.axiom_for_has_same_length_as(it.first), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt(str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i))))))); } - return pool[str]; + return intern; } diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index a7d9c4921c4..fb8e24d5320 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -35,9 +35,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a4 : forall i<|s1|. res[i]=s1[i] // a5 : forall i<|s2|. res[i+|s1|]=s2[i] - exprt a1=res.axiom_for_has_length( - plus_exprt(s1.length(), s2.length())); - axioms.push_back(a1); + res.length()=plus_exprt_with_overflow_check(s1.length(), s2.length()); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); @@ -73,8 +71,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()==2); - string_exprt s1=add_axioms_for_string_expr(args[0]); - string_exprt s2=add_axioms_for_string_expr(args[1]); + string_exprt s1=get_string_expr(args[0]); + string_exprt s2=get_string_expr(args[1]); return add_axioms_for_concat(s1, s2); } @@ -95,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int( args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); @@ -118,7 +116,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -138,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_bool string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -159,7 +157,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_char string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -180,9 +178,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_double string_exprt string_constraint_generatort::add_axioms_for_concat_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float( - args(f, 2)[1], MAX_DOUBLE_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, true); return add_axioms_for_concat(s1, s2); } @@ -201,8 +200,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_float string_exprt string_constraint_generatort::add_axioms_for_concat_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, false); return add_axioms_for_concat(s1, s2); } @@ -222,7 +223,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_code_point string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 64ffabfd266..2905c6a5f21 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -93,9 +93,13 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( const function_application_exprt &f) { assert(f.arguments().empty()); + assert(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt res=fresh_string(ref_type); - axioms.push_back(res.axiom_for_has_length(0)); + exprt size=from_integer(0, ref_type.get_index_type()); + const array_typet &content_type=ref_type.get_content_type(); + array_of_exprt empty_array( + from_integer(0, ref_type.get_content_type().subtype()), content_type); + string_exprt res(size, empty_array, ref_type); return res; } @@ -132,6 +136,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant + assert(false); // TODO: Check if used. On the contrary, discard else. assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 283de683bd6..d799091a573 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -92,15 +92,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: - // a1 : contains => |substring|>=offset>=from_index + // a1 : contains => |str|-|substring|>=offset>=from_index // a2 : !contains => offset=-1 - // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + // a3 : forall 0<=witness<|substring|. + // contains => str[witness+offset]=substring[witness] implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + substring.length(), offset)), binary_relation_exprt(offset, ID_ge, from_index))); axioms.push_back(a1); @@ -138,7 +139,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(substring.length(), offset)), binary_relation_exprt(offset, ID_le, from_index))); axioms.push_back(a1); @@ -173,7 +175,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); assert(f.type()==ref_type.get_index_type()); @@ -186,14 +188,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + assert(refined_string_typet::is_refined_string_type(c.type())); + string_exprt sub=get_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } } @@ -215,7 +218,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( exprt index1=from_integer(1, index_type); exprt minus1=from_integer(-1, index_type); - exprt from_index_plus_one=plus_exprt(from_index, index1); + exprt from_index_plus_one=plus_exprt_with_overflow_check(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); @@ -269,7 +272,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; @@ -282,14 +285,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_last_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + string_exprt sub=get_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 6a080a5dc6c..cd5f4376a93 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -48,8 +48,8 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + string_exprt s2=get_string_expr(args(f, 3)[2]); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -70,7 +70,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int( args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -93,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -115,7 +115,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -136,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_insert_char string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -158,8 +158,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_double string_exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, true); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -179,8 +180,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_float string_exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, false); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -216,7 +218,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( offset=from_integer(0, count.type()); } - string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + string_exprt str=get_string_expr(f.arguments()[0]); const exprt &length=f.arguments()[2]; const exprt &data=f.arguments()[3]; string_exprt arr=add_axioms_from_char_array( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 7fe510c2e35..f8dedaab9a7 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -16,6 +16,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include +#include unsigned string_constraint_generatort::next_symbol_id=1; @@ -122,6 +123,42 @@ symbol_exprt string_constraint_generatort::fresh_boolean( /*******************************************************************\ +Function: string_constraint_generatort::plus_exprt_with_overflow_check + + Inputs: + op1 - First term of the sum + op2 - Second term of the sum + + Outputs: A plus expression representing the sum of the arguments + + Purpose: Create a plus expression while adding extra constraints to + axioms in order to prevent overflows. + +\*******************************************************************/ +plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( + const exprt &op1, const exprt &op2) +{ + plus_exprt sum(plus_exprt(op1, op2)); + + exprt zero=from_integer(0, op1.type()); + + binary_relation_exprt neg1(op1, ID_lt, zero); + binary_relation_exprt neg2(op2, ID_lt, zero); + binary_relation_exprt neg_sum(sum, ID_lt, zero); + + // We prevent overflows by adding the following constraint: + // If the signs of the two operands are the same, then the sign of the sum + // should also be the same. + implies_exprt no_overflow(equal_exprt(neg1, neg2), + equal_exprt(neg1, neg_sum)); + + axioms.push_back(no_overflow); + + return sum; +} + +/*******************************************************************\ + Function: string_constraint_generatort::fresh_string Inputs: a type for string @@ -144,59 +181,113 @@ string_exprt string_constraint_generatort::fresh_string( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_string_expr +Function: string_constraint_generatort::get_string_expr + + Inputs: an expression of refined string type + + Outputs: a string expression + + Purpose: casts an expression to a string expression, or fetches the + actual string_exprt in the case of a symbol. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +{ + assert(refined_string_typet::is_refined_string_type(expr.type())); + + if(expr.id()==ID_symbol) + { + return find_or_add_string_of_symbol( + to_symbol_expr(expr), + to_refined_string_type(expr.type())); + } + else + { + return to_string_expr(expr); + } +} + +string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( + const exprt &jls) +{ + assert(get_mode()==ID_java); + assert(jls.id()==ID_struct); + + exprt length(to_struct_expr(jls).op1()); + // TODO: Add assertion on the type. + // assert(length.type()==refined_string_typet::index_type()); + exprt java_content(to_struct_expr(jls).op2()); + if(java_content.id()==ID_address_of) + { + java_content=to_address_of_expr(java_content).object(); + } + else + { + java_content=dereference_exprt(java_content, java_content.type()); + } + + refined_string_typet type(java_int_type(), java_char_type()); + + return string_exprt(length, java_content, type); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_refined_string - Inputs: an expression of type string + Inputs: an expression of refined string type Outputs: a string expression that is linked to the argument through axioms that are added to the list - Purpose: obtain a refined string expression corresponding to string - variable of string function call + Purpose: obtain a refined string expression corresponding to a expression + of type string \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_for_string_expr( - const exprt &unrefined_string) + +string_exprt string_constraint_generatort::add_axioms_for_refined_string( + const exprt &string) { - string_exprt s; + assert(refined_string_typet::is_refined_string_type(string.type())); + refined_string_typet type=to_refined_string_type(string.type()); - if(unrefined_string.id()==ID_function_application) + // Function applications should have been removed before + assert(string.id()!=ID_function_application); + + if(string.id()==ID_symbol) { - exprt res=add_axioms_for_function_application( - to_function_application_expr(unrefined_string)); - s=to_string_expr(res); + const symbol_exprt &sym=to_symbol_expr(string); + string_exprt s=find_or_add_string_of_symbol(sym, type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_symbol) - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); - else if(unrefined_string.id()==ID_address_of) + else if(string.id()==ID_nondet_symbol) { - assert(unrefined_string.op0().id()==ID_symbol); - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + string_exprt s=fresh_string(type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_if) - s=add_axioms_for_if(to_if_expr(unrefined_string)); - else if(unrefined_string.id()==ID_nondet_symbol || - unrefined_string.id()==ID_struct) + else if(string.id()==ID_if) { - // TODO: for now we ignore non deterministic symbols and struct + return add_axioms_for_if(to_if_expr(string)); } - else if(unrefined_string.id()==ID_typecast) + else if(string.id()==ID_struct) { - exprt arg=to_typecast_expr(unrefined_string).op(); - exprt res=add_axioms_for_string_expr(arg); - s=to_string_expr(res); + const string_exprt &s=to_string_expr(string); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } else { - throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + throw "add_axioms_for_refined_string:\n"+string.pretty()+ "\nwhich is not a function application, "+ - "a symbol or an if expression"; + "a symbol, a struct or an if expression"; } - - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); - return s; } /*******************************************************************\ @@ -216,10 +307,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( { assert( refined_string_typet::is_refined_string_type(expr.true_case().type())); - string_exprt t=add_axioms_for_string_expr(expr.true_case()); + string_exprt t=get_string_expr(expr.true_case()); assert( refined_string_typet::is_refined_string_type(expr.false_case().type())); - string_exprt f=add_axioms_for_string_expr(expr.false_case()); + string_exprt f=get_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -246,7 +337,7 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol Outputs: a string expression - Purpose: if a symbol represent a string is present in the symbol_to_string + Purpose: if a symbol representing a string is present in the symbol_to_string table, returns the corresponding string, if the symbol is not yet present, creates a new string with the correct type depending on whether the mode is java or c, adds it to the table and returns it. @@ -254,12 +345,11 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol \*******************************************************************/ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym) + const symbol_exprt &sym, const refined_string_typet &ref_type) { irep_idt id=sym.get_identifier(); - const refined_string_typet &ref_type=to_refined_string_type(sym.type()); string_exprt str=fresh_string(ref_type); - auto entry=symbol_to_string.insert(std::make_pair(id, str)); + auto entry=unresolved_symbols.insert(std::make_pair(id, str)); return entry.first->second; } @@ -286,125 +376,180 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); + std::string str_id(id.c_str()); + + size_t pos=str_id.find("func_length"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + // TODO: This part needs some improvement. + // Stripping the symbol name is not a very robust process. + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + assert(get_mode()==ID_java); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.length(); + } + else + return to_string_expr(res_it.first->second).length(); + } + + pos = str_id.find("func_data"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.content(); + } + else + return to_string_expr(res_it.first->second).content(); + } + // TODO: improve efficiency of this test by either ordering test by frequency // or using a map + auto res_it=function_application_cache.find(expr); + if(res_it!=function_application_cache.end() && res_it->second!=nil_exprt()) + return res_it->second; + + exprt res; + if(id==ID_cprover_char_literal_func) - return add_axioms_for_char_literal(expr); + res=add_axioms_for_char_literal(expr); else if(id==ID_cprover_string_length_func) - return add_axioms_for_length(expr); + res=add_axioms_for_length(expr); else if(id==ID_cprover_string_equal_func) - return add_axioms_for_equals(expr); + res=add_axioms_for_equals(expr); else if(id==ID_cprover_string_equals_ignore_case_func) - return add_axioms_for_equals_ignore_case(expr); + res=add_axioms_for_equals_ignore_case(expr); else if(id==ID_cprover_string_is_empty_func) - return add_axioms_for_is_empty(expr); + res=add_axioms_for_is_empty(expr); else if(id==ID_cprover_string_char_at_func) - return add_axioms_for_char_at(expr); + res=add_axioms_for_char_at(expr); else if(id==ID_cprover_string_is_prefix_func) - return add_axioms_for_is_prefix(expr); + res=add_axioms_for_is_prefix(expr); else if(id==ID_cprover_string_is_suffix_func) - return add_axioms_for_is_suffix(expr); + res=add_axioms_for_is_suffix(expr); else if(id==ID_cprover_string_startswith_func) - return add_axioms_for_is_prefix(expr, true); + res=add_axioms_for_is_prefix(expr, true); else if(id==ID_cprover_string_endswith_func) - return add_axioms_for_is_suffix(expr, true); + res=add_axioms_for_is_suffix(expr, true); else if(id==ID_cprover_string_contains_func) - return add_axioms_for_contains(expr); + res=add_axioms_for_contains(expr); else if(id==ID_cprover_string_hash_code_func) - return add_axioms_for_hash_code(expr); + res=add_axioms_for_hash_code(expr); else if(id==ID_cprover_string_index_of_func) - return add_axioms_for_index_of(expr); + res=add_axioms_for_index_of(expr); else if(id==ID_cprover_string_last_index_of_func) - return add_axioms_for_last_index_of(expr); + res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) - return add_axioms_for_parse_int(expr); + res=add_axioms_for_parse_int(expr); else if(id==ID_cprover_string_to_char_array_func) - return add_axioms_for_to_char_array(expr); + res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) - return add_axioms_for_code_point_at(expr); + res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) - return add_axioms_for_code_point_before(expr); + res=add_axioms_for_code_point_before(expr); else if(id==ID_cprover_string_code_point_count_func) - return add_axioms_for_code_point_count(expr); + res=add_axioms_for_code_point_count(expr); else if(id==ID_cprover_string_offset_by_code_point_func) - return add_axioms_for_offset_by_code_point(expr); + res=add_axioms_for_offset_by_code_point(expr); else if(id==ID_cprover_string_compare_to_func) - return add_axioms_for_compare_to(expr); + res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) - return add_axioms_from_literal(expr); + res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) - return add_axioms_for_concat(expr); + res=add_axioms_for_concat(expr); else if(id==ID_cprover_string_concat_int_func) - return add_axioms_for_concat_int(expr); + res=add_axioms_for_concat_int(expr); else if(id==ID_cprover_string_concat_long_func) - return add_axioms_for_concat_long(expr); + res=add_axioms_for_concat_long(expr); else if(id==ID_cprover_string_concat_bool_func) - return add_axioms_for_concat_bool(expr); + res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) - return add_axioms_for_concat_char(expr); + res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_double_func) - return add_axioms_for_concat_double(expr); + res=add_axioms_for_concat_double(expr); else if(id==ID_cprover_string_concat_float_func) - return add_axioms_for_concat_float(expr); + res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) - return add_axioms_for_concat_code_point(expr); + res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) - return add_axioms_for_insert(expr); + res=add_axioms_for_insert(expr); else if(id==ID_cprover_string_insert_int_func) - return add_axioms_for_insert_int(expr); + res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - return add_axioms_for_insert_long(expr); + res=add_axioms_for_insert_long(expr); else if(id==ID_cprover_string_insert_bool_func) - return add_axioms_for_insert_bool(expr); + res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) - return add_axioms_for_insert_char(expr); + res=add_axioms_for_insert_char(expr); else if(id==ID_cprover_string_insert_double_func) - return add_axioms_for_insert_double(expr); + res=add_axioms_for_insert_double(expr); else if(id==ID_cprover_string_insert_float_func) - return add_axioms_for_insert_float(expr); + res=add_axioms_for_insert_float(expr); +#if 0 else if(id==ID_cprover_string_insert_char_array_func) - return add_axioms_for_insert_char_array(expr); + res=add_axioms_for_insert_char_array(expr); +#endif else if(id==ID_cprover_string_substring_func) - return add_axioms_for_substring(expr); + res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) - return add_axioms_for_trim(expr); + res=add_axioms_for_trim(expr); else if(id==ID_cprover_string_to_lower_case_func) - return add_axioms_for_to_lower_case(expr); + res=add_axioms_for_to_lower_case(expr); else if(id==ID_cprover_string_to_upper_case_func) - return add_axioms_for_to_upper_case(expr); + res=add_axioms_for_to_upper_case(expr); else if(id==ID_cprover_string_char_set_func) - return add_axioms_for_char_set(expr); + res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_value_of_func) - return add_axioms_for_value_of(expr); + res=add_axioms_for_value_of(expr); else if(id==ID_cprover_string_empty_string_func) - return add_axioms_for_empty_string(expr); + res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) - return add_axioms_for_copy(expr); + res=add_axioms_for_copy(expr); else if(id==ID_cprover_string_of_int_func) - return add_axioms_from_int(expr); + res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) - return add_axioms_from_int_hex(expr); + res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) - return add_axioms_from_float(expr); + res=add_axioms_from_float(expr); else if(id==ID_cprover_string_of_double_func) - return add_axioms_from_double(expr); + res=add_axioms_from_double(expr); else if(id==ID_cprover_string_of_long_func) - return add_axioms_from_long(expr); + res=add_axioms_from_long(expr); else if(id==ID_cprover_string_of_bool_func) - return add_axioms_from_bool(expr); + res=add_axioms_from_bool(expr); else if(id==ID_cprover_string_of_char_func) - return add_axioms_from_char(expr); - else if(id==ID_cprover_string_of_char_array_func) - return add_axioms_from_char_array(expr); + res=add_axioms_from_char(expr); else if(id==ID_cprover_string_set_length_func) - return add_axioms_for_set_length(expr); + res=add_axioms_for_set_length(expr); else if(id==ID_cprover_string_delete_func) - return add_axioms_for_delete(expr); + res=add_axioms_for_delete(expr); else if(id==ID_cprover_string_delete_char_at_func) - return add_axioms_for_delete_char_at(expr); + res=add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) - return add_axioms_for_replace(expr); + res=add_axioms_for_replace(expr); + else if(id==ID_cprover_string_intern_func) + res=add_axioms_for_intern(expr); + else if(id==ID_cprover_string_array_of_char_pointer_func) + res=add_axioms_for_char_pointer(expr); else { std::string msg( @@ -412,13 +557,16 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); throw msg; } + function_application_cache[expr]=res; + return res; } /*******************************************************************\ Function: string_constraint_generatort::add_axioms_for_copy - Inputs: function application with one argument, which is a string + Inputs: function application with one argument, which is a string, + or three arguments: string, integer offset and count Outputs: a new string expression @@ -430,20 +578,20 @@ Function: string_constraint_generatort::add_axioms_for_copy string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); - - // We add axioms: - // a1 : |res|=|s1| - // a2 : forall i<|s1|. s1[i]=res[i] - - axioms.push_back(res.axiom_for_has_same_length_as(s1)); - - symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); - string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); - axioms.push_back(a2); - return res; + const auto &args=f.arguments(); + if(args.size()==1) + { + string_exprt s1=get_string_expr(args[0]); + return s1; + } + else + { + assert(args.size()==3); + string_exprt s1=get_string_expr(args[0]); + exprt offset=args[1]; + exprt count=args[2]; + return add_axioms_for_substring(s1, offset, plus_exprt(offset, count)); + } } /*******************************************************************\ @@ -473,6 +621,28 @@ string_exprt string_constraint_generatort::add_axioms_for_java_char_array( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_char_pointer + + Inputs: an expression of type char + + Outputs: an array expression + + Purpose: for an expression of the form `array[0]` returns `array` + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_pointer( + const function_application_exprt &fun) +{ + exprt char_pointer=args(fun, 1)[0]; + if(char_pointer.id()==ID_index) + return char_pointer.op0(); + // TODO: we do not know what to do in the other cases + assert(false); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_length Inputs: function application with one string argument @@ -486,7 +656,7 @@ Function: string_constraint_generatort::add_axioms_for_length exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.length(); } @@ -511,6 +681,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { + assert(false); // deprecated, we should use add_axioms_for_substring instead const typet &char_type=to_array_type(data.type()).subtype(); const typet &index_type=length.type(); refined_string_typet ref_type(index_type, char_type); @@ -523,7 +694,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); - char_in_tab.op1()=plus_exprt(qvar, offset); + char_in_tab.op1()=plus_exprt_with_overflow_check(qvar, offset); string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); axioms.push_back(a1); @@ -549,6 +720,7 @@ Function: string_constraint_generatort::add_axioms_from_char_array string_exprt string_constraint_generatort::add_axioms_from_char_array( const function_application_exprt &f) { + assert(false); // deprecated, we should use add_axioms_for_substring instead exprt offset; exprt count; if(f.arguments().size()==4) @@ -638,7 +810,7 @@ Function: string_constraint_generatort::add_axioms_for_char_at exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); @@ -660,26 +832,6 @@ Function: string_constraint_generatort::add_axioms_for_to_char_array exprt string_constraint_generatort::add_axioms_for_to_char_array( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.content(); } - -/*******************************************************************\ - -Function: string_constraint_generatort::set_string_symbol_equal_to_expr - - Inputs: a symbol and a string - - Purpose: add a correspondence to make sure the symbol points to the - same string as the second argument - -\*******************************************************************/ - -void string_constraint_generatort::set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str) -{ - if(str.id()==ID_symbol) - assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); - else - assign_to_symbol(sym, add_axioms_for_string_expr(str)); -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8b1c7bca9d4..2a08b021070 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -31,15 +31,15 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset - // a2 : forall 0<=qvar - // s0[witness+offset]=s2[witness] - // a3 : !isprefix => |str| < |prefix|+offset - // || (|str| >= |prefix|+offset &&0<=witness<|prefix| - // &&str[witness+ofsset]!=prefix[witness]) + // a2 : forall 0<=qvar<|prefix|. isprefix => s0[witness+offset]=s2[witness] + // a3 : !isprefix => + // |str|<|prefix|+offset || + // (0<=witness<|prefix| && str[witness+offset]!=prefix[witness]) implies_exprt a1( isprefix, - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + prefix.length(), offset))); axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); @@ -47,7 +47,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], + prefix[qvar])); axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); @@ -55,14 +56,13 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_is_strictly_longer_than(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], + prefix[witness]))); or_exprt s0_notpref_s1( not_exprt( - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), - and_exprt( - witness_diff, str.axiom_for_is_longer_than( - plus_exprt(prefix.length(), offset)))); + plus_exprt_with_overflow_check(prefix.length(), offset))), + witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); axioms.push_back(a3); @@ -88,8 +88,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) offset=from_integer(0, s0.length().type()); @@ -121,7 +121,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s0=get_string_expr(args(f, 1)[0]); axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); @@ -150,8 +150,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -159,7 +159,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] // a3 : !issuffix => - // s1.length > s0.length + // (s1.length > s0.length && witness=-1) // || (s1.length > witness>=0 // &&s1[witness]!=s0[witness + s0.length-s1.length] @@ -177,7 +177,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( - s0.axiom_for_is_strictly_longer_than(s1), + and_exprt(s0.axiom_for_is_strictly_longer_than(s1), + equal_exprt(witness, from_integer(-1, index_type))), and_exprt( notequal_exprt(s0[witness], s1[shifted]), and_exprt( @@ -207,9 +208,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); symbol_exprt contains=fresh_boolean("contains"); typecast_exprt tc_contains(contains, f.type()); - string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); - const typet &index_type=s0.type(); + string_exprt s0=get_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[1]); + const refined_string_typet ref_type=to_refined_string_type(s0.type()); + const typet &index_type=ref_type.get_index_type(); // We add axioms: // a1 : contains => s0.length >= s1.length diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83735e0d5b5..f27fe5d4317 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -29,7 +29,7 @@ Function: string_constraint_generatort::add_axioms_for_set_length string_exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -76,7 +76,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()>=2); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt i(args[1]); exprt j; if(args.size()==3) @@ -110,7 +110,6 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); - symbol_exprt idx=fresh_exist_index("index_substring", index_type); assert(start.type()==index_type); assert(end.type()==index_type); string_exprt res=fresh_string(ref_type); @@ -133,8 +132,11 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( // Warning: check what to do if the string is not long enough axioms.push_back(str.axiom_for_is_longer_than(end)); - string_constraintt a4( - idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); + symbol_exprt idx=fresh_univ_index("QA_index_substring", index_type); + string_constraintt a4(idx, + res.length(), + equal_exprt(res[idx], + str[plus_exprt_with_overflow_check(start, idx)])); axioms.push_back(a4); return res; } @@ -154,7 +156,7 @@ Function: string_constraint_generatort::add_axioms_for_trim string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -173,7 +175,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( // a8 : forall n<|s1|, s[idx+n]=s1[n] // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s| - exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + exprt a1=str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(idx, res.length())); axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); @@ -195,7 +198,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, + res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, @@ -210,7 +214,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, index_type)); + plus_exprt_with_overflow_check(idx, res.length()), + from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -236,7 +241,7 @@ Function: string_constraint_generatort::add_axioms_for_to_lower_case string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -289,7 +294,7 @@ Function: string_constraint_generatort::add_axioms_for_to_upper_case string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -345,7 +350,7 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); @@ -378,7 +383,7 @@ Function: string_constraint_generatort::add_axioms_for_replace string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; @@ -421,10 +426,12 @@ Function: string_constraint_generatort::add_axioms_for_delete_char_at string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); + str, + args(f, 2)[1], + plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); } /*******************************************************************\ @@ -468,6 +475,6 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index d1030e5e085..10fe490259a 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -63,7 +63,8 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], false); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, false); } /*******************************************************************\ @@ -81,7 +82,8 @@ Function: string_constraint_generatort::add_axioms_from_double string_exprt string_constraint_generatort::add_axioms_from_double( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], true); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, true); } /*******************************************************************\ @@ -99,13 +101,12 @@ Function: string_constraint_generatort::add_axioms_from_float \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_float( - const exprt &f, bool double_precision) + const exprt &f, const refined_string_typet &ref_type, bool double_precision) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); const typet &index_type=ref_type.get_index_type(); const typet &char_type=ref_type.get_char_type(); - string_exprt res=fresh_string(ref_type); - const exprt &index24=from_integer(24, ref_type.get_index_type()); + const exprt &index24=from_integer(24, index_type); axioms.push_back(res.axiom_for_is_shorter_than(index24)); string_exprt magnitude=fresh_string(ref_type); @@ -323,13 +324,22 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a1); exprt chr=res[0]; - exprt starts_with_minus=equal_exprt(chr, minus_char); - exprt starts_with_digit=and_exprt( + equal_exprt starts_with_minus(chr, minus_char); + and_exprt starts_with_digit( binary_relation_exprt(chr, ID_ge, zero_char), binary_relation_exprt(chr, ID_le, nine_char)); or_exprt a2(starts_with_digit, starts_with_minus); axioms.push_back(a2); + // These are constraints to detect number that requiere the maximum number + // of digits + exprt smallest_with_max_digits= + from_integer(smallest_by_digit(max_size-1), type); + binary_relation_exprt big_negative( + i, ID_le, unary_minus_exprt(smallest_with_max_digits)); + binary_relation_exprt big_positive(i, ID_ge, smallest_with_max_digits); + or_exprt requieres_max_digits(big_negative, big_positive); + for(size_t size=1; size<=max_size; size++) { // For each possible size, we add axioms: @@ -387,13 +397,18 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a6); } - // we have to be careful when exceeding the maximal size of integers + // when the size is close to the maximum, either the number is very big + // or it is negative + if(size==max_size-1) + { + implies_exprt a7(premise, or_exprt(requieres_max_digits, + starts_with_minus)); + axioms.push_back(a7); + } + // when we reach the maximal size the number is very big in the negative if(size==max_size) { - exprt smallest_with_10_digits=from_integer( - smallest_by_digit(max_size), type); - binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); - implies_exprt a7(premise, big); + implies_exprt a7(premise, and_exprt(starts_with_minus, big_negative)); axioms.push_back(a7); } } @@ -597,6 +612,60 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_correct_number_format + + Inputs: function application with one string expression + + Outputs: an boolean expression + + Purpose: add axioms making the return value true if the given string is + a correct number + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size) +{ + symbol_exprt correct=fresh_boolean("correct_number_format"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); + + exprt chr=str[0]; + equal_exprt starts_with_minus(chr, minus_char); + equal_exprt starts_with_plus(chr, plus_char); + and_exprt starts_with_digit( + binary_relation_exprt(chr, ID_ge, zero_char), + binary_relation_exprt(chr, ID_le, nine_char)); + + or_exprt correct_first( + or_exprt(starts_with_minus, starts_with_plus), starts_with_digit); + exprt has_first=str.axiom_for_is_longer_than(from_integer(1, index_type)); + implies_exprt a1(correct, and_exprt(has_first, correct_first)); + axioms.push_back(a1); + + exprt not_too_long=str.axiom_for_is_shorter_than(max_size); + axioms.push_back(not_too_long); + + symbol_exprt qvar=fresh_univ_index("number_format", index_type); + + and_exprt is_digit( + binary_relation_exprt(str[qvar], ID_ge, zero_char), + binary_relation_exprt(str[qvar], ID_le, nine_char)); + + string_constraintt a2( + qvar, from_integer(1, index_type), str.length(), correct, is_digit); + + axioms.push_back(a2); + return correct; +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_parse_int Inputs: function application with one string expression @@ -610,7 +679,7 @@ Function: string_constraint_generatort::add_axioms_for_parse_int exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); const typet &type=f.type(); symbol_exprt i=fresh_symbol("parsed_int", type); const refined_string_typet &ref_type=to_refined_string_type(str.type()); @@ -626,6 +695,10 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( exprt starts_with_plus=equal_exprt(chr, plus_char); exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char); + // TODO: we should throw an exception when this does not hold: + exprt correct=add_axioms_for_correct_number_format(str); + axioms.push_back(correct); + for(unsigned size=1; size<=10; size++) { exprt sum=from_integer(0, type); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index db9a2c460cc..1391ffd2315 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -11,17 +11,38 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ #include +#include #include #include #include +#include +#include #include #include #include +#include + +/*******************************************************************\ + +Constructor: string_refinementt + + Inputs: a namespace, a decision procedure, a bound on the number + of refinements and a boolean flag `concretize_result` + + Purpose: refinement_bound is a bound on the number of refinement allowed. + if `concretize_result` is set to true, at the end of the decision + procedure, the solver try to find a concrete value for each + character + +\*******************************************************************/ string_refinementt::string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound): + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound): supert(_ns, _prop), use_counter_example(false), + do_concretizing(false), initial_loop_bound(refinement_bound) { } @@ -52,15 +73,27 @@ Function: string_refinementt::display_index_set void string_refinementt::display_index_set() { + std::size_t count=0; + std::size_t count_current=0; for(const auto &i : index_set) { const exprt &s=i.first; - debug() << "IS(" << from_expr(s) << ")=={"; + debug() << "IS(" << from_expr(s) << ")=={" << eom; for(auto j : i.second) - debug() << from_expr(j) << "; "; + { + if(current_index_set[i.first].find(j)!=current_index_set[i.first].end()) + { + count_current++; + debug() << "**"; + } + debug() << " " << from_expr(j) << ";" << eom; + count++; + } debug() << "}" << eom; } + debug() << count << " elements in index set (" << count_current + << " newly added)" << eom; } /*******************************************************************\ @@ -100,152 +133,320 @@ void string_refinementt::add_instantiations() /*******************************************************************\ -Function: string_refinementt::convert_rest - - Inputs: an expression +Function: string_refinementt::add_symbol_to_symbol_map() - Outputs: a literal + Inputs: a symbol and the expression to map it to - Purpose: if the expression is a function application, we convert it - using our own convert_function_application method + Purpose: keeps a map of symbols to expressions, such as none of the + mapped values exist as a key \*******************************************************************/ -literalt string_refinementt::convert_rest(const exprt &expr) +void string_refinementt::add_symbol_to_symbol_map +(const exprt &lhs, const exprt &rhs) { - if(expr.id()==ID_function_application) + assert(lhs.id()==ID_symbol); + + // We insert the mapped value of the rhs, if it exists. + auto it=symbol_resolve.find(rhs); + const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; + + symbol_resolve[lhs]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(lhs); + + std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); + for(exprt item : symbols_to_update_with_new_rhs) { - // can occur in __CPROVER_assume - bvt bv=convert_function_application(to_function_application_expr(expr)); - assert(bv.size()==1); - return bv[0]; + symbol_resolve[item]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(item); } - else +} + +/*******************************************************************\ + +Function: string_refinementt::set_char_array_equality() + + Inputs: the rhs and lhs of an equality over character arrays + + Purpose: add axioms if the rhs is a character array + +\*******************************************************************/ + +void string_refinementt::set_char_array_equality( + const exprt &lhs, const exprt &rhs) +{ + assert(lhs.id()==ID_symbol); + + if(rhs.id()==ID_array && rhs.type().id()==ID_array) { - return supert::convert_rest(expr); + const typet &index_type=to_array_type(rhs.type()).size().type(); + for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) + { + // Introduce axioms to map symbolic rhs to its char array. + index_exprt arraycell(rhs, from_integer(i, index_type)); + equal_exprt arrayeq(arraycell, rhs.operands()[i]); + add_lemma(arrayeq, false); +#if 0 + generator.axioms.push_back(arrayeq); +#endif + } } + // At least for Java (as it is currently pre-processed), we need not consider + // other cases, because all character arrays find themselves on the rhs of an + // equality. Note that this might not be the case for other languages. } /*******************************************************************\ -Function: string_refinementt::convert_symbol +Function: string_refinementt::substitute_function_applications() - Inputs: an expression + Inputs: an expression containing function applications - Outputs: a bitvector + Outputs: an epression containing no function application - Purpose: if the expression as string type, look up for the string in the - list of string symbols that we maintain, and convert it; - otherwise use the method of the parent class + Purpose: remove functions applications and create the necessary + axioms \*******************************************************************/ -bvt string_refinementt::convert_symbol(const exprt &expr) +exprt string_refinementt::substitute_function_applications(exprt expr) { - const typet &type=expr.type(); - const irep_idt &identifier=expr.get(ID_identifier); - assert(!identifier.empty()); + for(size_t i=0; iMAX_CONCRETE_STRING_SIZE? + MAX_CONCRETE_STRING_SIZE:concretize_limit; + exprt content_expr=str.content(); + replace_expr(current_model, content_expr); + for(size_t i=0; i &pair : non_string_axioms) + { + replace_expr(symbol_resolve, pair.first); + debug() << "super::set_to " << from_expr(pair.first) << eom; + supert::set_to(pair.first, pair.second); + } + + for(exprt &axiom : generator.axioms) + { + replace_expr(symbol_resolve, axiom); if(axiom.id()==ID_string_constraint) { string_constraintt c=to_string_constraint(axiom); @@ -282,6 +494,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() { add_lemma(axiom); } + } initial_index_set(universal_axioms); update_index_set(cur); @@ -302,6 +515,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; + concretize_lengths(); return D_SATISFIABLE; } @@ -318,7 +532,13 @@ decision_proceduret::resultt string_refinementt::dec_solve() if(current_index_set.empty()) { debug() << "current index set is empty" << eom; - return D_SATISFIABLE; + if(do_concretizing) + { + concretize_results(); + do_concretizing=false; + } + else + return D_SATISFIABLE; } display_index_set(); @@ -367,131 +587,337 @@ bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) Function: string_refinementt::add_lemma - Inputs: a lemma + Inputs: a lemma and Boolean value stating whether the lemma should + be added to the index set. Purpose: add the given lemma to the solver \*******************************************************************/ -void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) +void string_refinementt::add_lemma( + const exprt &lemma, bool _simplify, bool add_to_index_set) { if(!seen_instances.insert(lemma).second) return; - if(lemma.is_true()) + if(add_to_index_set) + cur.push_back(lemma); + + exprt simple_lemma=lemma; + if(_simplify) + simplify(simple_lemma, ns); + + if(simple_lemma.is_true()) { +#if 0 debug() << "string_refinementt::add_lemma : tautology" << eom; +#endif return; } - debug() << "adding lemma " << from_expr(lemma) << eom; + debug() << "adding lemma " << from_expr(simple_lemma) << eom; - prop.l_set_to_true(convert(lemma)); - if(add_to_index_set) - cur.push_back(lemma); + prop.l_set_to_true(convert(simple_lemma)); } /*******************************************************************\ -Function: string_refinementt::string_of_array +Function: string_refinementt::get_array - Inputs: a constant array expression and a integer expression + Inputs: an expression representing an array and an expression + representing an integer - Outputs: a string + Outputs: an array expression or an array_of_exprt - Purpose: convert the content of a string to a more readable representation. - This should only be used for debbuging. + Purpose: get a model of an array and put it in a certain form. + If the size cannot be obtained or if it is too big, return an + empty array. \*******************************************************************/ -std::string string_refinementt::string_of_array( - const exprt &arr, const exprt &size) const +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const { - if(size.id()!=ID_constant) - return "string of unknown size"; + exprt arr_val=get_array(arr); + exprt size_val=supert::get(size); + size_val=simplify_expr(size_val, ns); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); + array_typet empty_ret_type(char_type, from_integer(0, index_type)); + array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + + if(size_val.id()!=ID_constant) + { +#if 0 + debug() << "(sr::get_array) string of unknown size: " + << from_expr(size_val) << eom; +#endif + return empty_ret; + } + unsigned n; - if(to_unsigned_integer(to_constant_expr(size), n)) - n=0; + if(to_unsigned_integer(to_constant_expr(size_val), n)) + { +#if 0 + debug() << "(sr::get_array) size is not valid" << eom; +#endif + return empty_ret; + } + + array_typet ret_type(char_type, from_integer(n, index_type)); + array_exprt ret(ret_type); if(n>MAX_CONCRETE_STRING_SIZE) - return "very long string"; + { +#if 0 + debug() << "(sr::get_array) long string (size=" << n << ")" << eom; +#endif + return empty_ret; + } + if(n==0) - return "\"\""; + { +#if 0 + debug() << "(sr::get_array) empty string" << eom; +#endif + return empty_ret; + } - std::ostringstream buf; - buf << "\""; - exprt val=get(arr); + std::vector concrete_array(n); - if(val.id()=="array-list") + if(arr_val.id()=="array-list") { - for(size_t i=0; i(c); + exprt value=arr_val.operands()[i*2+1]; + to_unsigned_integer(to_constant_expr(value), concrete_array[idx]); } } } } + else if(arr_val.id()==ID_array) + { + for(size_t i=0; i=32) + result << (unsigned char) c; + else + { + result << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << (unsigned int) c; + } + } + + return result.str(); +} + +/*******************************************************************\ - for(size_t i=0; i &m, bool negated) const + std::map &m, const typet &type, bool negated) const { exprt sum=nil_exprt(); mp_integer constants=0; typet index_type; if(m.empty()) - return nil_exprt(); + return from_integer(0, type); else index_type=m.begin()->first.type(); @@ -723,12 +1122,20 @@ exprt string_refinementt::sum_over_map( default: if(second>1) { - for(int i=0; isecond; i--) + if(sum.is_nil()) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + for(int i=-1; i>second; i--) sum=minus_exprt(sum, t); } } @@ -755,7 +1162,7 @@ Function: string_refinementt::simplify_sum exprt string_refinementt::simplify_sum(const exprt &f) const { std::map map=map_representation_of_sum(f); - return sum_over_map(map); + return sum_over_map(map, f.type()); } /*******************************************************************\ @@ -800,7 +1207,7 @@ exprt string_refinementt::compute_inverse_function( } elems.erase(it); - return sum_over_map(elems, neg); + return sum_over_map(elems, f.type(), neg); } @@ -830,7 +1237,7 @@ Function: find_qvar Outputs: a Boolean - Purpose: looks for the symbol and return true if it is found + Purpose: look for the symbol and return true if it is found \*******************************************************************/ @@ -885,6 +1292,26 @@ Function: string_refinementt::initial_index_set and the upper bound minus one \*******************************************************************/ +void string_refinementt::add_to_index_set(const exprt &s, exprt i) +{ + simplify(i, ns); + if(i.id()==ID_constant) + { + mp_integer mpi; + to_integer(i, mpi); + if(mpi<0) + { + debug() << "add_to_index_set : ignoring negative number " << mpi << eom; + return; + } + } + if(index_set[s].insert(i).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(i) << eom; + current_index_set[s].insert(i); + } +} void string_refinementt::initial_index_set(const string_constraintt &axiom) { @@ -906,8 +1333,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) // if cur is of the form s[i] and no quantified variable appears in i if(!has_quant_var) { - current_index_set[s].insert(i); - index_set[s].insert(i); + add_to_index_set(s, i); } else { @@ -917,8 +1343,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) axiom.upper_bound(), from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); - current_index_set[s].insert(e); - index_set[s].insert(e); + add_to_index_set(s, e); } } else @@ -952,12 +1377,7 @@ void string_refinementt::update_index_set(const exprt &formula) const exprt &i=cur.op1(); assert(s.type().id()==ID_array); exprt simplified=simplify_sum(i); - if(index_set[s].insert(simplified).second) - { - debug() << "adding to index set of " << from_expr(s) - << ": " << from_expr(simplified) << eom; - current_index_set[s].insert(simplified); - } + add_to_index_set(s, simplified); } else { @@ -1014,19 +1434,18 @@ exprt find_index(const exprt &expr, const exprt &str) catch (exprt i) { return i; } } - /*******************************************************************\ Function: string_refinementt::instantiate - Inputs: an universaly quantified formula `axiom`, an array of char + Inputs: a universally quantified formula `axiom`, an array of char variable `str`, and an index expression `val`. - Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + Outputs: substitute `qvar` the universally quantified variable of `axiom`, by an index `val`, in `axiom`, so that the index used for `str` equals `val`. For instance, if `axiom` corresponds to - $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return - an expression for $s[v]='a' && t[v-x]='b'$. + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` + would return an expression for $s[v]='a' && t[v-x]='b'$. \*******************************************************************/ @@ -1051,6 +1470,17 @@ exprt string_refinementt::instantiate( return implies_exprt(bounds, instance); } +/*******************************************************************\ + +Function: string_refinementt::instantiate_not_contains + + Inputs: a quantified formula representing `not_contains`, and a + list to which to add the created lemmas to + + Purpose: instantiate a quantified formula representing `not_contains` + by substituting the quantifiers and generating axioms + +\*******************************************************************/ void string_refinementt::instantiate_not_contains( const string_not_contains_constraintt &axiom, std::list &new_lemmas) @@ -1102,3 +1532,76 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(witness_bounds); } } + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_lists() + + Inputs: an expression containing array-list expressions + + Outputs: an epression containing no array-list + + Purpose: replace array-lists by 'with' expressions + +\*******************************************************************/ + +exprt string_refinementt::substitute_array_lists(exprt expr) const +{ + for(size_t i=0; i=2); + typet &char_type=expr.operands()[1].type(); + array_typet arr_type(char_type, infinity_exprt(char_type)); + array_of_exprt new_arr(from_integer(0, char_type), + arr_type); + + with_exprt ret_expr(new_arr, + expr.operands()[0], + expr.operands()[1]); + + for(size_t i=2; isecond); + } + + ecopy=supert::get(ecopy); + + return substitute_array_lists(ecopy); +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index be0cd332bc3..252146f4620 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -14,6 +14,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #include +#include #include #include @@ -27,30 +28,32 @@ Author: Alberto Griggio, alberto.griggio@gmail.com class string_refinementt: public bv_refinementt { public: - // refinement_bound is a bound on the number of refinement allowed string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound); + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound); void set_mode(); // Should we use counter examples at each iteration? bool use_counter_example; - virtual std::string decision_procedure_text() const + bool do_concretizing; + + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); } static exprt is_positive(const exprt &x); + exprt get(const exprt &expr) const override; + protected: typedef std::set expr_sett; + typedef std::list exprt_listt; - virtual bvt convert_symbol(const exprt &expr); - virtual bvt convert_function_application( - const function_application_exprt &expr); - - decision_proceduret::resultt dec_solve(); + decision_proceduret::resultt dec_solve() override; bvt convert_bool_bv(const exprt &boole, const exprt &orig); @@ -60,6 +63,9 @@ class string_refinementt: public bv_refinementt unsigned initial_loop_bound; + // Is the current model correct + bool concrete_model; + string_constraint_generatort generator; // Simple constraints that have been given to the solver @@ -76,23 +82,42 @@ class string_refinementt: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions std::map current_index_set; std::map index_set; + replace_mapt symbol_resolve; + std::map reverse_symbol_resolve; + std::list> non_string_axioms; - void display_index_set(); + // Valuation in the current model of the symbols that have been created + // by the solver + replace_mapt current_model; - void add_lemma(const exprt &lemma, bool add_to_index_set=true); + void add_equivalence(const irep_idt & lhs, const exprt & rhs); - bool boolbv_set_equality_to_true(const equal_exprt &expr); + void display_index_set(); - literalt convert_rest(const exprt &expr); + void add_lemma(const exprt &lemma, + bool simplify=true, + bool add_to_index_set=true); - void add_instantiations(); + exprt substitute_function_applications(exprt expr); + typet substitute_java_string_types(typet type); + exprt substitute_java_strings(exprt expr); + void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); + bool is_char_array(const typet &type) const; + bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); + void set_to(const exprt &expr, bool value) override; + void add_instantiations(); + void add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver); + void fill_model(); bool check_axioms(); + void set_char_array_equality(const exprt &lhs, const exprt &rhs); void update_index_set(const exprt &formula); void update_index_set(const std::vector &cur); void initial_index_set(const string_constraintt &axiom); void initial_index_set(const std::vector &string_axioms); + void add_to_index_set(const exprt &s, exprt i); exprt instantiate( const string_constraintt &axiom, const exprt &str, const exprt &val); @@ -101,17 +126,26 @@ class string_refinementt: public bv_refinementt const string_not_contains_constraintt &axiom, std::list &new_lemmas); + exprt substitute_array_lists(exprt) const; + exprt compute_inverse_function( const exprt &qvar, const exprt &val, const exprt &f); std::map map_representation_of_sum(const exprt &f) const; - exprt sum_over_map(std::map &m, bool negated=false) const; + exprt sum_over_map( + std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; - exprt get_array(const exprt &arr, const exprt &size); + void concretize_results(); + void concretize_lengths(); + // Length of char arrays found during concretization + std::map found_length; + + exprt get_array(const exprt &arr, const exprt &size) const; + exprt get_array(const exprt &arr) const; - std::string string_of_array(const exprt &arr, const exprt &size) const; + std::string string_of_array(const array_exprt &arr); }; #endif From 03c56603d6f7ebe814e27611d35f21bf8069eb47 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 22 Mar 2017 15:05:40 +0000 Subject: [PATCH 10/15] Need to assign length before calls since it can be overwritten by functions with side-effect --- src/goto-programs/string_refine_preprocess.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index 8403d4bf044..7243f66049a 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -361,7 +361,16 @@ exprt string_refine_preprocesst::make_cprover_string_assign( std::list assignments; // 1) cprover_string_length= *(rhs->length) + symbolt sym_length=get_fresh_aux_symbol( + length_type, + "cprover_string_length", + "cprover_string_length", + location, + ID_java, + symbol_table); + symbol_exprt cprover_length=sym_length.symbol_expr(); member_exprt length(deref, "length", length_type); + assignments.emplace_back(cprover_length, length); // 2) cprover_string_array = *(rhs->data) symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); @@ -373,7 +382,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( // This assignment is useful for finding witnessing strings for counter // examples refined_string_typet ref_type(length_type, java_char_type()); - string_exprt new_rhs(length, array_lhs, ref_type); + string_exprt new_rhs(cprover_length, array_lhs, ref_type); symbol_exprt lhs=fresh_string(new_rhs.type(), location); assignments.emplace_back(lhs, new_rhs); From b01c356765a75036218eb9839beee4fc1ba73682 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 27 Feb 2017 10:40:20 +0000 Subject: [PATCH 11/15] Adding refined_string_type to util Makefile --- src/util/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util/Makefile b/src/util/Makefile index fefabaed3af..0bfb2539af4 100644 --- a/src/util/Makefile +++ b/src/util/Makefile @@ -23,7 +23,9 @@ SRC = arith_tools.cpp base_type.cpp cmdline.cpp config.cpp symbol_table.cpp \ memory_info.cpp pipe_stream.cpp irep_hash.cpp endianness_map.cpp \ ssa_expr.cpp json_irep.cpp json_expr.cpp \ fresh_symbol.cpp \ - string_utils.cpp + string_utils.cpp \ + refined_string_type.cpp \ + #Empty last line INCLUDES= -I .. From bdabff79a42c991b282f9b46b899596b52feca45 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 10 Jan 2017 13:42:48 +0000 Subject: [PATCH 12/15] Adding string preprocessing of goto-programs to Makefile --- src/goto-programs/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index d5cc5ea25eb..f23a0e387ed 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -20,7 +20,8 @@ SRC = goto_convert.cpp goto_convert_function_call.cpp \ slice_global_inits.cpp goto_inline_class.cpp class_identifier.cpp \ show_goto_functions_json.cpp \ show_goto_functions_xml.cpp \ - remove_static_init_loops.cpp remove_instanceof.cpp + remove_static_init_loops.cpp remove_instanceof.cpp \ + string_refine_preprocess.cpp INCLUDES= -I .. From 1ca18b51cbb46fbbf481f384f05fcfcbdf46f0bf Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 10 Jan 2017 13:43:30 +0000 Subject: [PATCH 13/15] Adding string solver cpp files to Makefile --- src/solvers/Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/solvers/Makefile b/src/solvers/Makefile index 95ea7650274..136db5765c1 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -118,6 +118,17 @@ SRC = $(CHAFF_SRC) $(BOOLEFORCE_SRC) $(MINISAT_SRC) $(MINISAT2_SRC) \ floatbv/float_utils.cpp floatbv/float_bv.cpp \ refinement/bv_refinement_loop.cpp refinement/refine_arithmetic.cpp \ refinement/refine_arrays.cpp \ + refinement/string_refinement.cpp \ + refinement/string_constraint_generator_code_points.cpp \ + refinement/string_constraint_generator_comparison.cpp \ + refinement/string_constraint_generator_concat.cpp \ + refinement/string_constraint_generator_constants.cpp \ + refinement/string_constraint_generator_indexof.cpp \ + refinement/string_constraint_generator_insert.cpp \ + refinement/string_constraint_generator_main.cpp \ + refinement/string_constraint_generator_testing.cpp \ + refinement/string_constraint_generator_transformation.cpp \ + refinement/string_constraint_generator_valueof.cpp \ miniBDD/miniBDD.cpp INCLUDES += -I .. \ From 2b83d4832994bb19768e8b77146fd2c07ca211aa Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 10 Jan 2017 13:36:33 +0000 Subject: [PATCH 14/15] Adding the string refinement option to the CBMC solvers We add the option `--string-refine` which can be used to activate the string solver, in order to deal precisely with string functions. --- src/cbmc/cbmc_parse_options.cpp | 15 +++++++++++++++ src/cbmc/cbmc_parse_options.h | 1 + src/cbmc/cbmc_solvers.cpp | 24 ++++++++++++++++++++++++ src/cbmc/cbmc_solvers.h | 13 ++++++++----- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index e33e700e0ca..b6a96d1e178 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -20,6 +20,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -310,6 +311,11 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("refine-arithmetic", true); } + if(cmdline.isset("string-refine")) + { + options.set_option("string-refine", true); + } + if(cmdline.isset("max-node-refinement")) options.set_option( "max-node-refinement", @@ -904,6 +910,14 @@ bool cbmc_parse_optionst::process_goto_program( status() << "Partial Inlining" << eom; goto_partial_inline(goto_functions, ns, ui_message_handler); + + if(cmdline.isset("string-refine")) + { + status() << "Preprocessing for string refinement" << eom; + string_refine_preprocesst( + symbol_table, goto_functions, ui_message_handler); + } + // remove returns, gcc vectors, complex remove_returns(symbol_table, goto_functions); remove_vector(symbol_table, goto_functions); @@ -1191,6 +1205,7 @@ void cbmc_parse_optionst::help() " --yices use Yices\n" " --z3 use Z3\n" " --refine use refinement procedure (experimental)\n" + " --string-refine use string refinement (experimental)\n" " --outfile filename output formula to given file\n" " --arrays-uf-never never turn arrays into uninterpreted functions\n" // NOLINT(*) " --arrays-uf-always always turn arrays into uninterpreted functions\n" // NOLINT(*) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index a3fc5e7e7d3..c6ed41c1e6a 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -37,6 +37,7 @@ class optionst; "(no-sat-preprocessor)" \ "(no-pretty-names)(beautify)" \ "(dimacs)(refine)(max-node-refinement):(refine-arrays)(refine-arithmetic)"\ + "(string-refine)" \ "(aig)(16)(32)(64)(LP64)(ILP64)(LLP64)(ILP32)(LP32)" \ "(little-endian)(big-endian)" \ "(show-goto-functions)(show-loops)" \ diff --git a/src/cbmc/cbmc_solvers.cpp b/src/cbmc/cbmc_solvers.cpp index ccd660dc586..a72546a8d0c 100644 --- a/src/cbmc/cbmc_solvers.cpp +++ b/src/cbmc/cbmc_solvers.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -213,6 +214,29 @@ cbmc_solverst::solvert* cbmc_solverst::get_bv_refinement() /*******************************************************************\ +Function: cbmc_solverst::get_string_refinement + + Outputs: a solver for cbmc + + Purpose: the string refinement adds to the bit vector refinement + specifications for functions from the Java string library + +\*******************************************************************/ + +cbmc_solverst::solvert* cbmc_solverst::get_string_refinement() +{ + propt *prop; + prop=new satcheck_no_simplifiert(); + prop->set_message_handler(get_message_handler()); + + string_refinementt *string_refinement=new string_refinementt( + ns, *prop, MAX_NB_REFINEMENT); + string_refinement->set_ui(ui); + return new solvert(string_refinement, prop); +} + +/*******************************************************************\ + Function: cbmc_solverst::get_smt1 Inputs: diff --git a/src/cbmc/cbmc_solvers.h b/src/cbmc/cbmc_solvers.h index 42d47fcaed3..be4a0e0ccde 100644 --- a/src/cbmc/cbmc_solvers.h +++ b/src/cbmc/cbmc_solvers.h @@ -111,15 +111,17 @@ class cbmc_solverst:public messaget solvert *solver; if(options.get_bool_option("dimacs")) - solver = get_dimacs(); + solver=get_dimacs(); else if(options.get_bool_option("refine")) - solver = get_bv_refinement(); + solver=get_bv_refinement(); + else if(options.get_bool_option("string-refine")) + solver=get_string_refinement(); else if(options.get_bool_option("smt1")) - solver = get_smt1(get_smt1_solver_type()); + solver=get_smt1(get_smt1_solver_type()); else if(options.get_bool_option("smt2")) - solver = get_smt2(get_smt2_solver_type()); + solver=get_smt2(get_smt2_solver_type()); else - solver = get_default(); + solver=get_default(); return std::unique_ptr(solver); } @@ -141,6 +143,7 @@ class cbmc_solverst:public messaget solvert *get_default(); solvert *get_dimacs(); solvert *get_bv_refinement(); + solvert *get_string_refinement(); solvert *get_smt1(smt1_dect::solvert solver); solvert *get_smt2(smt2_dect::solvert solver); From fb91e6d0274336d3601b2a4a41dc07b75209818a Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Thu, 23 Mar 2017 18:18:14 +0000 Subject: [PATCH 15/15] Change option name to --refine-strings --- src/cbmc/cbmc_parse_options.cpp | 8 ++++---- src/cbmc/cbmc_parse_options.h | 2 +- src/cbmc/cbmc_solvers.h | 2 +- src/java_bytecode/java_bytecode_language.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index b6a96d1e178..b90afbb307e 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -311,9 +311,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("refine-arithmetic", true); } - if(cmdline.isset("string-refine")) + if(cmdline.isset("refine-strings")) { - options.set_option("string-refine", true); + options.set_option("refine-strings", true); } if(cmdline.isset("max-node-refinement")) @@ -911,7 +911,7 @@ bool cbmc_parse_optionst::process_goto_program( goto_partial_inline(goto_functions, ns, ui_message_handler); - if(cmdline.isset("string-refine")) + if(cmdline.isset("refine-strings")) { status() << "Preprocessing for string refinement" << eom; string_refine_preprocesst( @@ -1205,7 +1205,7 @@ void cbmc_parse_optionst::help() " --yices use Yices\n" " --z3 use Z3\n" " --refine use refinement procedure (experimental)\n" - " --string-refine use string refinement (experimental)\n" + " --refine-strings use string refinement (experimental)\n" " --outfile filename output formula to given file\n" " --arrays-uf-never never turn arrays into uninterpreted functions\n" // NOLINT(*) " --arrays-uf-always always turn arrays into uninterpreted functions\n" // NOLINT(*) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index c6ed41c1e6a..266984c5c17 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -37,7 +37,7 @@ class optionst; "(no-sat-preprocessor)" \ "(no-pretty-names)(beautify)" \ "(dimacs)(refine)(max-node-refinement):(refine-arrays)(refine-arithmetic)"\ - "(string-refine)" \ + "(refine-strings)" \ "(aig)(16)(32)(64)(LP64)(ILP64)(LLP64)(ILP32)(LP32)" \ "(little-endian)(big-endian)" \ "(show-goto-functions)(show-loops)" \ diff --git a/src/cbmc/cbmc_solvers.h b/src/cbmc/cbmc_solvers.h index be4a0e0ccde..c837bd55806 100644 --- a/src/cbmc/cbmc_solvers.h +++ b/src/cbmc/cbmc_solvers.h @@ -114,7 +114,7 @@ class cbmc_solverst:public messaget solver=get_dimacs(); else if(options.get_bool_option("refine")) solver=get_bv_refinement(); - else if(options.get_bool_option("string-refine")) + else if(options.get_bool_option("refine-strings")) solver=get_string_refinement(); else if(options.get_bool_option("smt1")) solver=get_smt1(get_smt1_solver_type()); diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 0e2eb25d0ec..c2a817430bc 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -44,7 +44,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) { disable_runtime_checks=cmd.isset("disable-runtime-check"); assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); - string_refinement_enabled=cmd.isset("string-refine"); + string_refinement_enabled=cmd.isset("refine-strings"); if(cmd.isset("java-max-input-array-length")) max_nondet_array_length= std::stoi(cmd.get_value("java-max-input-array-length"));