From 72ad6ad81ee9e8477390ed64f9836a671fdbc4c7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 7 Feb 2017 11:11:47 +0000 Subject: [PATCH 1/5] Making add_string_type more generic The class_name is now passed as argument so that we can reuse this function for StringBuilder and other "String-like" classes. --- .../java_bytecode_convert_class.cpp | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index f21b0bb1ce2..7f52c6ef77b 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget convert(parse_tree.parsed_class); else if(string_refinement_enabled && parse_tree.parsed_class.name=="java.lang.String") - add_string_type(); + add_string_type("java.lang.String"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuilder") + add_string_type("java.lang.StringBuilder"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.CharSequence") + add_string_type("java.lang.CharSequence"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuffer") + add_string_type("java.lang.StringBuffer"); else generate_class_stub(parse_tree.parsed_class.name); } @@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); - void add_string_type(); + void add_string_type(const irep_idt &class_name); }; /*******************************************************************\ @@ -406,15 +415,17 @@ bool java_bytecode_convert_class( Function: java_bytecode_convert_classt::add_string_type + Inputs: a name for the class such as "java.lang.String" + Purpose: Implements the java.lang.String type in the case that we provide an internal implementation. \*******************************************************************/ -void java_bytecode_convert_classt::add_string_type() +void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name) { class_typet string_type; - string_type.set_tag("java.lang.String"); + string_type.set_tag(class_name); string_type.components().resize(3); string_type.components()[0].set_name("@java.lang.Object"); string_type.components()[0].set_pretty_name("@java.lang.Object"); @@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type() string_type.add_base(symbol_typet("java::java.lang.Object")); symbolt string_symbol; - string_symbol.name="java::java.lang.String"; - string_symbol.base_name="java.lang.String"; + string_symbol.name="java::"+id2string(class_name); + string_symbol.base_name=id2string(class_name); string_symbol.type=string_type; string_symbol.is_type=true; @@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type() symbolt string_equals_symbol; string_equals_symbol.name= "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; - string_equals_symbol.base_name="java.lang.String.equals"; - string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.base_name=id2string(class_name)+".equals"; + string_equals_symbol.pretty_name=id2string(class_name)+".equals"; string_equals_symbol.mode=ID_java; code_typet string_equals_type; From c9ff379f11992cafc7a5be00875802f0bd6c7ef5 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 9 Mar 2017 15:02:08 +0000 Subject: [PATCH 2/5] new identifier for converting char pointer to array --- src/util/irep_ids.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index b9d2d253538..1958522eaee 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -744,6 +744,7 @@ string_constraint string_not_contains_constraint cprover_char_literal_func cprover_string_literal_func +cprover_string_array_of_char_pointer_func cprover_string_char_at_func cprover_string_char_set_func cprover_string_code_point_at_func From a04dc21fc7d02eafb8fd4d792536707084acf93d Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 17 Feb 2017 13:18:54 +0000 Subject: [PATCH 3/5] Many corrections in preprocessing of strings Better signatures and string detection Better detection of string-like types and handling of char arrays Changing the way we deal with string initialisation from array Factorized part of type recognition and added StringBuilder and CharSequence to the list of java classes that should be considered as string. Changed the way we deal with StringBuilders: instead of having a map we add an assignment to the instructions. We also detect char arrays and handle them better. We now use substring and copy functions for initialisation from char array since the argument are always transformed into refined strings. For each string returned by a string function we also add into the java_string_to_cprover_string map a string_exprt. Corrected detection of typecast in make_cprover_string_assign Ensuring refined_string arguments of function applications are string_exprt Correct string_refine_preprocesst constructor. Order of initialisation is now the same as the order of declaration. This was picked up by g++ and clang. Added signatures for some StringBuilder functions. Removed map java_to_cprover_string and adapt signature for side effects. The usage of a map is not correct since strings can be modified by side effects. Signature is necessary for StringBuilders to be assigned in the right way with methods with side effects. Assign all string_exprt to refined string symbols in preprocessing. This makes it then easier to debug and to find witnesses for counter examples in the solver. Make signatures take priority over actual type and add signature for intern. Linting corrections Adding malloc for char arrays for String.toCharArray Fixing preprocessing of string function with side effect This fixes problems we were getting with some StringBuilder functions. The return value should contain a pointer to the original StringBuilder and the fields of the StringBuilder should be filled with the result of the function call. Corrected mistake in preprocessing of string functions with side effects char array assignements returns a string_exprt This is to be uniform with other preprocessing functions also returning string_exprt Preprocessing for StringBuilder.append on char array Corrected update of signature and commented out some unused code Corrected the initialization from char array We cannot use the substring function because the meaning of the third argument is different between `String.([CII)` and `String.substring(SII)` Using new id for conversion between char pointer and array Cleaning of preprocessing for strings Removed useless functions and merged some maps Make a copy of the function call to be modified Removed redeclaration of location --- .../string_refine_preprocess.cpp | 797 +++++++++--------- src/goto-programs/string_refine_preprocess.h | 71 +- 2 files changed, 416 insertions(+), 452 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index ec02988d577..8403d4bf044 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -23,6 +23,32 @@ Date: September 2016 /*******************************************************************\ +Function: string_refine_preprocesst::check_java_type + + Inputs: a type and a string + + Outputs: Boolean telling whether the type is a struct with the given + tag or a symbolic type with the tag prefixed by "java::" + +\*******************************************************************/ + +bool string_refine_preprocesst::check_java_type( + const typet &type, const std::string &tag) +{ + if(type.id()==ID_symbol) + { + irep_idt tag_id=to_symbol_type(type).get_identifier(); + return tag_id=="java::"+tag; + } + else if(type.id()==ID_struct) + { + irep_idt tag_id=to_struct_type(type).get_tag(); + return tag_id==tag; + } + return false; +} +/*******************************************************************\ + Function: string_refine_preprocesst::is_java_string_pointer_type Inputs: a type @@ -54,17 +80,7 @@ Function: string_refine_preprocesst::is_java_string_type bool string_refine_preprocesst::is_java_string_type(const typet &type) { - if(type.id()==ID_symbol) - { - const irep_idt &tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; + return check_java_type(type, "java.lang.String"); } /*******************************************************************\ @@ -79,17 +95,7 @@ Function: string_refine_preprocesst::is_java_string_builder_type bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) { - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; + return check_java_type(type, "java.lang.StringBuilder"); } /*******************************************************************\ @@ -114,6 +120,7 @@ bool string_refine_preprocesst::is_java_string_builder_pointer_type( } return false; } + /*******************************************************************\ Function: string_refine_preprocesst::is_java_char_sequence_type @@ -125,16 +132,67 @@ Function: string_refine_preprocesst::is_java_char_sequence_type \*******************************************************************/ bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + return check_java_type(type, "java.lang.CharSequence"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_pointer_type( + const typet &type) { if(type.id()==ID_pointer) { const pointer_typet &pt=to_pointer_type(type); const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } + return is_java_char_sequence_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_type(const typet &type) +{ + return check_java_type(type, "array[char]"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_array_type(subtype); } return false; } @@ -217,6 +275,32 @@ void string_refine_preprocesst::declare_function( /*******************************************************************\ +Function: string_refine_preprocesst::get_data_and_length_type_of_char_array + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a char array, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type) +{ + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + Function: string_refine_preprocesst::get_data_and_length_type_of_string Inputs: an expression, a reference to a data type and a reference to a @@ -232,7 +316,8 @@ void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { assert(is_java_string_type(expr.type()) || - is_java_string_builder_type(expr.type())); + is_java_string_builder_type(expr.type()) || + is_java_char_sequence_type(expr.type())); typet object_type=ns.follow(expr.type()); const struct_typet &struct_type=to_struct_type(object_type); for(const auto &component : struct_type.components()) @@ -263,44 +348,43 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(is_java_string_pointer_type(rhs.type())) + if(implements_java_char_sequence(rhs.type())) { - auto pair=java_to_cprover_strings.insert( - std::pair(rhs, nil_exprt())); - - if(pair.second) - { - // We do the following assignments: - // cprover_string_array = *(rhs->data) - // cprover_string = { rhs->length; cprover_string_array } - - dereference_exprt deref(rhs, rhs.type().subtype()); - - typet data_type, length_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - member_exprt length(deref, "length", length_type); - symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); - - // string expression for the rhs of the second assignment - string_exprt new_rhs( - length, array_lhs, refined_string_typet(length_type, data_type)); - - member_exprt data(deref, "data", data_type); - dereference_exprt deref_data(data, data_type.subtype()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - - std::list assignments; - assignments.emplace_back(array_lhs, deref_data); - assignments.emplace_back(lhs, new_rhs); - insert_assignments( - goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - pair.first->second=lhs; - } - return pair.first->second; + // We do the following assignments: + // 1 length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // 1) cprover_string_length= *(rhs->length) + member_exprt length(deref, "length", length_type); + + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); + + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(length, array_lhs, ref_type); + + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; } else if(rhs.id()==ID_typecast && - is_java_string_pointer_type(rhs.op0().type())) + implements_java_char_sequence(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, target, rhs.op0(), location); @@ -312,6 +396,75 @@ exprt string_refine_preprocesst::make_cprover_string_assign( /*******************************************************************\ +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression of + type char array pointer and a location + + Outputs: a string expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +string_exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + assert(is_java_char_array_pointer_type(rhs.type())); + + // We do the following assignments: + // deref=*(rhs->data) + // array= typecast(&deref); + // string={ rhs->length; array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_char_array(deref, data_type, length_type); + assert(data_type.id()==ID_pointer); + typet char_type=to_pointer_type(data_type).subtype(); + + refined_string_typet ref_type(length_type, java_char_type()); + typet content_type=ref_type.get_content_type(); + std::list assignments; + + // deref=*(rhs->data) + member_exprt array_rhs(deref, "data", data_type); + dereference_exprt deref_array(array_rhs, data_type.subtype()); + symbolt sym_lhs_deref=get_fresh_aux_symbol( + data_type.subtype(), + "char_array_assign$deref", + "char_array_assign$deref", + location, + ID_java, + symbol_table); + symbol_exprt lhs_deref=sym_lhs_deref.symbol_expr(); + assignments.emplace_back(lhs_deref, deref_array); + + // array=convert_pointer_to_char_array(*rhs->data) + declare_function(ID_cprover_string_array_of_char_pointer_func, content_type); + function_application_exprt fun_app(symbol_exprt( + ID_cprover_string_array_of_char_pointer_func), content_type); + fun_app.arguments().push_back(deref_array); + symbol_exprt array=fresh_array(content_type, location); + assignments.emplace_back(array, fun_app); + + // string={ rhs->length; string_array } + string_exprt new_rhs(get_length(deref, length_type), array, ref_type); + symbol_exprt lhs=fresh_string(ref_type, location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; +} + +/*******************************************************************\ + Function: string_refine_preprocesst::make_normal_assign Inputs: a goto_program, a position in this program, an expression lhs, @@ -336,27 +489,19 @@ void string_refine_preprocesst::make_normal_assign( const source_locationt &location, const std::string &signature) { - if(function_name==ID_cprover_string_copy_func) - { - assert(!arguments.empty()); - make_string_copy(goto_program, target, lhs, arguments[0], location); - } - else - { - function_application_exprt rhs( - symbol_exprt(function_name), function_type.return_type()); - rhs.add_source_location()=location; - declare_function(function_name, function_type); + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); - exprt::operandst processed_arguments=process_arguments( - goto_program, target, arguments, location, signature); - rhs.arguments()=processed_arguments; + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs.arguments()=processed_arguments; - code_assignt assignment(lhs, rhs); - assignment.add_source_location()=location; - target->make_assignment(); - target->code=assignment; - } + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + target->make_assignment(); + target->code=assignment; } /*******************************************************************\ @@ -420,7 +565,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(is_java_string_pointer_type(function_type.return_type())); + assert(implements_java_char_sequence(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -454,98 +599,22 @@ void string_refine_preprocesst::make_string_assign( malloc_expr.type()=pointer_typet(object_type); malloc_expr.add_source_location()=location; + refined_string_typet ref_type(length_type, data_type.subtype().subtype()); + string_exprt str(tmp_length, tmp_array, ref_type); + symbol_exprt cprover_string_sym=fresh_string(ref_type, location); + std::list assigns; assigns.emplace_back(lhs, malloc_expr); assigns.emplace_back(tmp_length, rhs_length); assigns.emplace_back(lhs_length, tmp_length); assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(cprover_string_sym, str); assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); insert_assignments(goto_program, target, target->function, location, assigns); } /*******************************************************************\ -Function: string_refine_preprocesst::make_assign - - Inputs: a goto_program, a position in this program, an expression lhs, - a function type, a function name, a vector of arguments, a location - and a signature - - Purpose: assign the result of the function application to lhs, - in case the function type is string, it does a special assignment - using `make_string_assign` - -\*******************************************************************/ - -void string_refine_preprocesst::make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig) -{ - if(is_java_string_pointer_type(function_type.return_type())) - make_string_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); - else - make_normal_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_string_copy - - Inputs: a goto_program, a position in this program, a lhs expression, - an argument expression and a location - - Outputs: an expression - - Purpose: replace the current instruction by: - > lhs->length=argument->length - > tmp_data=*(argument->data) - > lhs->data=&tmp_data - -\*******************************************************************/ - -void string_refine_preprocesst::make_string_copy( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, - const source_locationt &location) -{ - // TODO : treat CharSequence and StringBuffer - assert(is_java_string_pointer_type(lhs.type()) || - is_java_string_builder_pointer_type(lhs.type())); - exprt deref=dereference_exprt(lhs, lhs.type().subtype()); - - typet length_type, data_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - - dereference_exprt deref_arg(argument, argument.type().subtype()); - std::list assignments; - - exprt lhs_length=get_length(deref, length_type); - exprt rhs_length=get_length(deref_arg, length_type); - assignments.emplace_back(lhs_length, rhs_length); - - symbol_exprt tmp_data=fresh_array(data_type.subtype(), location); - exprt rhs_data=get_data(deref_arg, data_type); - exprt lhs_data=get_data(deref, data_type); - assignments.emplace_back( - tmp_data, dereference_exprt(rhs_data, data_type.subtype())); - assignments.emplace_back(lhs_data, address_of_exprt(tmp_data)); - - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - Function: string_refine_preprocesst::make_string_function Inputs: a position in a goto program, a function name, an expression lhs, @@ -566,7 +635,36 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(is_java_string_pointer_type(function_type.return_type())) + if(signature.length()>0) + { + if(signature.back()=='S') + { + code_typet ft=function_type; + ft.return_type()=jls_ptr; + typecast_exprt lhs2(lhs, jls_ptr); + + make_string_assign( + goto_program, + target, + lhs2, + ft, + function_name, + arguments, + location, + signature); + } + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + } + else if(implements_java_char_sequence(function_type.return_type())) make_string_assign( goto_program, target, @@ -597,9 +695,11 @@ Function: string_refine_preprocesst::make_string_function Purpose: at the current position replace `lhs=s.some_function(x,...)` by `lhs=function_name(s,x,...)`; option `assign_first_arg` uses `s` instead of `lhs` in the resulting - expression; + expression, Warning : it assumes that `s` is string-like option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one + the first one; + arguments that are string (TODO: and char array) are replaced + by string_exprt \*******************************************************************/ @@ -635,11 +735,17 @@ void string_refine_preprocesst::make_string_function( new_type.parameters().push_back(function_type.parameters()[i]); } + std::string new_sig=signature; exprt lhs; if(assign_first_arg) { assert(!function_call.arguments().empty()); lhs=function_call.arguments()[0]; + std::size_t size=function_call.arguments().size(); + if(signature.length()<=size) + new_sig.resize(size+1, '_'); + + new_sig.replace(size, 1, "S"); } else lhs=function_call.lhs(); @@ -650,7 +756,7 @@ void string_refine_preprocesst::make_string_function( new_type.return_type()=lhs.type(); make_string_function( - goto_program, target, lhs, new_type, function_name, args, loc, signature); + goto_program, target, lhs, new_type, function_name, args, loc, new_sig); } /*******************************************************************\ @@ -680,9 +786,11 @@ Function: string_refine_preprocesst::make_string_function_side_effect Inputs: a position in a goto program and a function name - Purpose: at the current position, replace `r=s.some_function(x,...)` - by `s=function_name(s,x,...)` and add a correspondance from r - to s in the `string_builders` map + Purpose: at the current position, replace `r=s.some_function(x,...)` by + > tmp=function_name(x,...) + > s->data=tmp.data + > s->length=tmp.length + > r=s \*******************************************************************/ @@ -692,11 +800,48 @@ void string_refine_preprocesst::make_string_function_side_effect( const irep_idt &function_name, const std::string &signature) { - const code_function_callt function_call=to_code_function_call(target->code); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; - make_string_function( - goto_program, target, function_name, signature, true, false); + code_function_callt function_call=to_code_function_call(target->code); + source_locationt loc=function_call.source_location(); + std::list assignments; + exprt lhs=function_call.lhs(); + exprt s=function_call.arguments()[0]; + code_typet function_type=to_code_type(function_call.type()); + + function_type.return_type()=s.type(); + + if(lhs.is_not_nil()) + { + symbol_exprt tmp_string=fresh_string(lhs.type(), loc); + + make_string_assign( + goto_program, + target, + tmp_string, + function_type, + function_name, + function_call.arguments(), + loc, + signature); + dereference_exprt deref_lhs(s, s.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref_lhs, data_type, length_type); + member_exprt lhs_data(deref_lhs, "data", data_type); + member_exprt lhs_length(deref_lhs, "length", length_type); + dereference_exprt deref_rhs(tmp_string, s.type().subtype()); + member_exprt rhs_data(deref_rhs, "data", data_type); + member_exprt rhs_length(deref_rhs, "length", length_type); + assignments.emplace_back(lhs_length, rhs_length); + assignments.emplace_back(lhs_data, rhs_data); + assignments.emplace_back(lhs, s); + target=goto_program.insert_after(target); + insert_assignments( + goto_program, target, target->function, loc, assignments); + } + else + { + make_string_function( + goto_program, target, function_name, signature, true, false); + } } /*******************************************************************\ @@ -735,6 +880,7 @@ Function: string_refine_preprocesst::make_to_char_array_function Inputs: a goto program and a position in that goto program Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0 = malloc(array[char]); > return_tmp0->data=&((s->data)[0]) > return_tmp0->length=s->length @@ -744,16 +890,36 @@ void string_refine_preprocesst::make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &target) { const code_function_callt &function_call=to_code_function_call(target->code); + source_locationt location=function_call.source_location(); - assert(!function_call.arguments().empty()); + assert(function_call.arguments().size()>=1); const exprt &string_argument=function_call.arguments()[0]; assert(is_java_string_pointer_type(string_argument.type())); + typet deref_type=function_call.lhs().type().subtype(); + const exprt &lhs=function_call.lhs(); + dereference_exprt deref_lhs(lhs, deref_type); + dereference_exprt deref(string_argument, string_argument.type().subtype()); typet length_type, data_type; get_data_and_length_type_of_string(deref, data_type, length_type); std::list assignments; + // lhs=malloc(array[char]) + typet object_type=ns.follow(deref_type); + exprt object_size=size_of_expr(object_type, ns); + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_to_char_array_function " + << "got nil object_size" << eom; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + assignments.emplace_back(lhs, malloc_expr); + + // &((s->data)[0]) exprt rhs_data=get_data(deref, data_type); dereference_exprt rhs_array(rhs_data, data_type.subtype()); @@ -762,8 +928,6 @@ void string_refine_preprocesst::make_to_char_array_function( address_of_exprt rhs_pointer(first_element); // return_tmp0->data=&((s->data)[0]) - typet deref_type=function_call.lhs().type().subtype(); - dereference_exprt deref_lhs(function_call.lhs(), deref_type); exprt lhs_data=get_data(deref_lhs, data_type); assignments.emplace_back(lhs_data, rhs_pointer); @@ -771,178 +935,8 @@ void string_refine_preprocesst::make_to_char_array_function( exprt rhs_length=get_length(deref, length_type); exprt lhs_length=get_length(deref_lhs, length_type); assignments.emplace_back(lhs_length, rhs_length); - source_locationt location=target->source_location; - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_cprover_char_array_assign - - Inputs: a goto_program, a position in this program, an expression and a - location - - Outputs: a char array expression (not a pointer) - - Purpose: Introduce a temporary variable for cprover strings; - returns the cprover_string corresponding to rhs - -\*******************************************************************/ - -exprt string_refine_preprocesst::make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location) -{ - // TODO : add an assertion on the type of rhs - - // We do the following assignments: - // cprover_string_array = rhs.data - // cprover_string = { rhs.length; cprover_string_array } - - // string expression for the rhs of the second assignment - string_exprt new_rhs(java_char_type()); - - typet data_type=new_rhs.content().type(); - typet length_type=java_int_type(); - - symbol_exprt array_lhs=fresh_array(data_type, location); - exprt array_rhs=get_data(rhs, new_rhs.content().type()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - new_rhs.length()=get_length(rhs, length_type); - new_rhs.content()=array_lhs; - - std::list assignments; - assignments.emplace_back(array_lhs, array_rhs); - assignments.emplace_back(lhs, new_rhs); insert_assignments( goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - return lhs; -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_char_array_function - - Inputs: a position in a goto program, a function name, two Boolean options, - and the index of the char array argument in the function - - Purpose: at the given position replace - `lhs=s.some_function(...,char_array,...)` by - > cprover_string = { char_array->length, *char_array } - > tmp_string=function_name(s, cprover_string, ...) - option `assign_first_arg` uses `s` instead of `lhs` in the second - assignment; - option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one; - argument index gives the index of the argument containing char_array - -\*******************************************************************/ - -void string_refine_preprocesst::make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg, - bool skip_first_arg) -{ - code_function_callt &function_call=to_code_function_call(target->code); - code_typet function_type=to_code_type(function_call.function().type()); - code_typet new_function_type; - const source_locationt &location=function_call.source_location(); - assert(!function_call.arguments().size()>index); - const std::vector &args=function_call.arguments(); - std::vector new_args; - - exprt lhs; - if(assign_first_arg) - { - assert(!function_call.arguments().empty()); - lhs=function_call.arguments()[0]; - else - lhs=function_call.lhs(); - - if(lhs.id()==ID_typecast) - lhs=to_typecast_expr(lhs).op(); - - exprt char_array=dereference_exprt( - function_call.arguments()[index], - function_call.arguments()[index].type().subtype()); - exprt string=make_cprover_char_array_assign( - goto_program, target, char_array, location); - - std::size_t start_index=skip_first_arg?1:0; - for(std::size_t i=start_index; icode); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; } /*******************************************************************\ @@ -974,22 +968,20 @@ exprt::operandst string_refine_preprocesst::process_arguments( for(std::size_t i=0; isecond); - else + if(iis_function_call()) { code_function_callt &function_call=to_code_function_call(target->code); - for(auto &arg : function_call.arguments()) - { - auto sb_it=string_builders.find(arg); - if(sb_it!=string_builders.end()) - arg=sb_it->second; - } if(function_call.function().id()==ID_symbol) { @@ -1070,21 +1056,6 @@ void string_refine_preprocesst::replace_string_calls( make_string_function_call( goto_program, target, it->second, signature); - it=string_of_char_array_functions.find(function_id); - if(it!=string_of_char_array_functions.end()) - make_char_array_function( - goto_program, target, it->second, signature, 0); - - it=string_of_char_array_function_calls.find(function_id); - if(it!=string_of_char_array_function_calls.end()) - make_char_array_function_call( - goto_program, target, it->second, signature); - - it=side_effect_char_array_functions.find(function_id); - if(it!=side_effect_char_array_functions.end()) - make_char_array_side_effect( - goto_program, target, it->second, signature); - if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) make_to_char_array_function(goto_program, target); } @@ -1299,12 +1270,18 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_set_length_func; - side_effect_char_array_functions + + side_effect_functions + ["java::java.lang.StringBuilder.append:([C)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; - side_effect_char_array_functions + ID_cprover_string_insert_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; + ID_cprover_string_insert_func; + // TODO clean irep ids from insert_char_array etc... string_function_calls ["java::java.lang.String.:(Ljava/lang/String;)V"]= @@ -1320,23 +1297,23 @@ void string_refine_preprocesst::initialize_string_function_table() string_function_calls["java::java.lang.StringBuilder.:()V"]= ID_cprover_string_empty_string_func; - string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= - ID_cprover_string_of_char_array_func; + string_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_copy_func; - string_of_char_array_functions + string_functions ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= ID_cprover_string_literal_func; @@ -1370,9 +1347,23 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_of_int_func; signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; - signatures - ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= - "SSZ"; + signatures["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; + signatures["java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;"]="SIZS"; + signatures["java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;"]="SIJS"; + signatures["java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;"]="SIIS"; + signatures["java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;"]="SICS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(I[C)" + "Ljava/lang/StringBuilder;"]="SI[S"; + signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; } /*******************************************************************\ diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index 613d0161926..5ae398b757a 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -14,6 +14,7 @@ Date: September 2016 #include #include +#include class string_refine_preprocesst:public messaget { @@ -29,28 +30,21 @@ class string_refine_preprocesst:public messaget typedef std::unordered_map id_mapt; typedef std::unordered_map expr_mapt; - // String builders maps the different names of a same StringBuilder object - // to a unique expression. - expr_mapt string_builders; - // Map name of Java string functions to there equivalent in the solver id_mapt side_effect_functions; id_mapt string_functions; id_mapt c_string_functions; id_mapt string_function_calls; - id_mapt string_of_char_array_functions; - id_mapt string_of_char_array_function_calls; - id_mapt side_effect_char_array_functions; std::unordered_map signatures; - expr_mapt hidden_strings; - expr_mapt java_to_cprover_strings; // unique id for each newly created symbols int next_symbol_id; void initialize_string_function_table(); + static bool check_java_type(const typet &type, const std::string &tag); + static bool is_java_string_pointer_type(const typet &type); static bool is_java_string_type(const typet &type); @@ -61,6 +55,20 @@ class string_refine_preprocesst:public messaget static bool is_java_char_sequence_type(const typet &type); + static bool is_java_char_sequence_pointer_type(const typet &type); + + static bool is_java_char_array_type(const typet &type); + + static bool is_java_char_array_pointer_type(const typet &type); + + static bool implements_java_char_sequence(const typet &type) + { + return + is_java_char_sequence_pointer_type(type) || + is_java_string_builder_pointer_type(type) || + is_java_string_pointer_type(type); + } + symbol_exprt fresh_array( const typet &type, const source_locationt &location); symbol_exprt fresh_string( @@ -110,6 +118,9 @@ class string_refine_preprocesst:public messaget void get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type); + void get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type); + function_application_exprt build_function_application( const irep_idt &function_name, const typet &type, @@ -136,27 +147,16 @@ class string_refine_preprocesst:public messaget const source_locationt &location, const std::string &signature); - void make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig); - exprt make_cprover_string_assign( goto_programt &goto_program, goto_programt::targett &target, const exprt &rhs, const source_locationt &location); - void make_string_copy( + string_exprt make_cprover_char_array_assign( goto_programt &goto_program, goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, + const exprt &rhs, const source_locationt &location); void make_string_function( @@ -192,33 +192,6 @@ class string_refine_preprocesst:public messaget void make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &); - exprt make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location); - - void make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg=false, - bool skip_first_arg=false); - - void make_char_array_function_call( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - - void make_char_array_side_effect( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - void replace_string_calls(goto_functionst::function_mapt::iterator f_it); }; From bdbeaf586aa45a429b0fa390169514e5d2a3c7ad Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 28 Feb 2017 13:50:25 +0000 Subject: [PATCH 4/5] Many corrections in string refinement Overhaul of string solver contributions The code adding lemmas to the solver for each equality has been completely remastered. It has been simplified and should now cover all foreseable cases. Add overflow constraints on sums for string solver When creating sum expressions during string refinement, e.g. when adding string lengths, we shall add axioms to prevent the solver from finding values that actually come from an integer overflow. Therefore, we introduce a new method plus_exprt_with_overflow_check() that wraps around plus_exprt and generates the relevant axioms. Cache function applications Cache converted function applications to avoid re-generating the axioms if it has already been done for a given function application. This seems to happen for function applications that are in an assertion, and thus are treated separately by convert_bitvector. Remove string_refinement::convert_symbol: the behaviour of this function was to convert java strings to strings of symbols. In hindsight, this was not a good idea, as we still refer to the actual java string fields in some cases, e.g. to read its length. Replace add_axioms_for_string_expr In all add_axioms_... methods, we replace all calls to add_axioms_for_string_expr by the newly created function get_string_expr, which simply returns a string_exprt when we give it an refined expr (such as a string_exprt or a symbol). Update doc in add_axioms_for_index_string Simplify constraint for is_prefix: apply distribution law and update (and refactor) documentation. Remove set_string_symbol_equal_to_expr() Removed mentions of java string type in the string solver Removing mentions of is_c_string_type To use the string solver in C, we should use a struct type with tag __CPROVER_refined_string_type Keep non-string axioms in a list Instead of giving axioms to super::boolbv_set_equality_to_true, which may contain un-substituted symbols, we keep them in a list and substitute them in dec_solve() before giving them to the function. Cleaning the add_axioms_for_string_expr method and renamed it to add_axioms_for_refined_string to signify that it should only be called on expression of type refined string. An assertion was added at the beginning to ensure that. Better get_array and string_of_array functions. Cleaned the implementation of get_array and factorized part of it. Made string_of_array directly take a array_exprt, which can be obtained from get_array. Improved string_of_array for non printable characters Resolve symbol to char arrays in non-string axioms. We introduce a mapping that expresses the maximal traversal of symbols of char array type. We use that map to, either obtain a char array from a symbol, either get a unique symbol the aliases equivalence class. This map is used to replace all symbols of char array type in the axioms that are added using the parent method: supert::boolbv_set_equality_to_true. Defining several intermediary functions for check_axioms and cleaning. Check axioms is quite complicated so we splitted it in several parts. Avoid using same universal variable name in string comparison function. Corrected test in index_of as characters could be signed Corrected return type retrieval in from_float functions. The return type should be given as argument of the helper function add_axioms_from_float. Fixed type of contains in constraint generation Introduce string_refinementt::set_to. We now override the set_to() method instead of set_equality_to_true(). This solves a problem where many lemmas were being dropped. Handle array_of in set_char_array_equality. Java assignments such as char[] str=new char[10] involves assigning the char array to 0, which is done using the array_of operator. This operator is now handled when found on the rhs of a char array assignment. Optimized string hash code and intern functions to only look at seen strings. We only compare the argument to the strings on which hash_code (resp. intern) was already called. Add missing override keyword in declarations Integrate strings into symbol resolution. We now use for strings the mechanism that was made for resolving symbols to char arrays. As a result, the symbol_to_string map has been removed. We introduce an unresolved_symbol map to associate symbols to string expressions that were introduced during constraint generation. Preventing return of nil exprt in sum_over_map Enforce -1 for witnesses when strings are unequal. This makes it easier to see when models for string have different length. Correct array index in get_array Correct a bad array access that caused a segmentation fault on the java_delete test. Adding option to concretize result and get method in string solver The option to concretize makes sure that the model that we get in the end is correct. Overiding the get method is necessary to get the actual valuation of string gotten by the solver. This two additions makes the trace generation for program with strings more likely to be actual traces of the program. Avoid adding axioms for copy Corrected `get` to not replace strings Simplifying lemmas before handing them to the solver This seems to improve performances Ignoring char array that are not symbol or constants Avoid creating new length variable for concat Adding constraint on positive length for refined strings Using get method of the parent class for the size Add function to fill the found_length map Signature for get_array should be constant Corrected overflow problem when integer get to the max size Removing overflow check in universal constraint Enforcing witness -1 when length is not sufficient in suffix Corrected index variable which should be universally quantified Factorizing addition to index set and more debug information Avoiding generating a fresh string for the empty string Enabling string solver to treat more cases and more debug infos Conversion from char pointer to array in the solver Raise a warning if found length is negative Ensure the arguments of parseInt are of the right format For now we add axioms asking for the argument to have the right format but ultimately we should raise an exception when it is not the case (which is not done right now). Corrects the specification of int to string conversion Some problems where encountered for strings with the maximal size possible for an int, which could cause an overflow. Disallow + sign in string from int Java will not add the sign for positive numbers Use get instead of current_model in check_axioms Streamline code of check_axioms() by calling get() insteand of relying on the 'current_model' variable. get() has been adapted to convert array-lists into with expressions, the former not being handled by the string solver. --- src/solvers/refinement/string_constraint.h | 1 - .../refinement/string_constraint_generator.h | 45 +- ...tring_constraint_generator_code_points.cpp | 18 +- ...string_constraint_generator_comparison.cpp | 102 +- .../string_constraint_generator_concat.cpp | 31 +- .../string_constraint_generator_constants.cpp | 9 +- .../string_constraint_generator_indexof.cpp | 27 +- .../string_constraint_generator_insert.cpp | 24 +- .../string_constraint_generator_main.cpp | 422 +++++--- .../string_constraint_generator_testing.cpp | 46 +- ...ng_constraint_generator_transformation.cpp | 39 +- .../string_constraint_generator_valueof.cpp | 101 +- src/solvers/refinement/string_refinement.cpp | 917 ++++++++++++++---- src/solvers/refinement/string_refinement.h | 66 +- 14 files changed, 1323 insertions(+), 525 deletions(-) diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 4b8d1229e09..1bb460349b7 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -48,7 +48,6 @@ class string_constraintt: public exprt return operands()[4]; } - private: string_constraintt(); diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index ed898cb9a18..12447e0cf04 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,6 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include +#include #include #include @@ -65,21 +66,20 @@ class string_constraint_generatort symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); string_exprt fresh_string(const refined_string_typet &type); + string_exprt get_string_expr(const exprt &expr); + string_exprt convert_java_string_to_string_exprt( + const exprt &underlying); + plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - // We maintain a map from symbols to strings. - std::map symbol_to_string; + // Maps unresolved symbols to the string_exprt that was created for them + std::map unresolved_symbols; - string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); - void assign_to_symbol( - const symbol_exprt &sym, const string_exprt &expr) - { - symbol_to_string[sym.get_identifier()]=expr; - } + string_exprt find_or_add_string_of_symbol( + const symbol_exprt &sym, + const refined_string_typet &ref_type); - string_exprt add_axioms_for_string_expr(const exprt &expr); - void set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str); + string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); @@ -96,6 +96,8 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; + std::map function_application_cache; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -117,6 +119,9 @@ class string_constraint_generatort // The specification is partial: the actual value is not actually computed // but we ensure that hash codes of equal strings are equal. exprt add_axioms_for_hash_code(const function_application_exprt &f); + // To each string on which hash_code was called we associate a symbol + // representing the return value of the hash_code function. + std::map hash_code_of_string; exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( @@ -224,7 +229,9 @@ class string_constraint_generatort // the start for negative number string_exprt add_axioms_from_float(const function_application_exprt &f); string_exprt add_axioms_from_float( - const exprt &f, bool double_precision=false); + const exprt &f, + const refined_string_typet &ref_type, + bool double_precision); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial @@ -253,6 +260,7 @@ class string_constraint_generatort string_exprt add_axioms_for_code_point( const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); + exprt add_axioms_for_char_pointer(const function_application_exprt &fun); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -270,6 +278,8 @@ class string_constraint_generatort const function_application_exprt &f); exprt add_axioms_for_parse_int(const function_application_exprt &f); + exprt add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size=10); exprt add_axioms_for_to_char_array(const function_application_exprt &f); exprt add_axioms_for_compare_to(const function_application_exprt &f); @@ -278,6 +288,9 @@ class string_constraint_generatort // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + // Pool used for the intern method + std::map intern_of_string; + // Tells which language is used. C and Java are supported irep_idt mode; @@ -293,14 +306,8 @@ class string_constraint_generatort exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; - static exprt character_equals_ignore_case( + exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - - // Pool used for the intern method - std::map pool; - - // Used to determine whether hashcode should be equal - std::map hash; }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 7c035a0d3e4..16763dfc97c 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( { typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; symbol_exprt result=fresh_symbol("char", return_type); exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt(pos, index1)]; + const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt is_low=is_low_surrogate( + str[plus_exprt_with_overflow_check(pos, index1)]); exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); assert(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt(index, offset); - exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + exprt minimum=plus_exprt_with_overflow_check(index, offset); + exprt maximum=plus_exprt_with_overflow_check( + index, plus_exprt_with_overflow_check(offset, offset)); axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 321282ee9c1..a51e2abe3cd 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals Outputs: a expression of Boolean type Purpose: add axioms stating that the result is true exactly when the strings - represented by the arguments are equal + represented by the arguments are equal. + the variable ending in `witness_unequal` is -1 if the length differs + or an index at which the strings are different \*******************************************************************/ @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals( symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness])); - implies_exprt a3( - not_exprt(eq), - or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing)); + and_exprt diff_length( + notequal_exprt(s1.length(), s2.length()), + equal_exprt(witness, from_integer(-1, index_type))); + implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing)); axioms.push_back(a3); return tc_eq; @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case( // p3 : (is_up2&&'a'-'A'+char2=char1) equal_exprt p1(char1, char2); minus_exprt diff=minus_exprt(char_a, char_A); - and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2)); - and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1)); + + // Overflow is not a problem here because is_upper_case conditions + // ensure that we are within a safe range. + and_exprt p2(is_upper_case_1, + equal_exprt(plus_exprt(diff, char1), char2)); + and_exprt p3(is_upper_case_2, + equal_exprt(plus_exprt(diff, char2), char1)); return or_exprt(or_exprt(p1, p2), p3); } @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( symbol_exprt eq=fresh_boolean("equal_ignore_case"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet char_type=to_refined_string_type(s1.type()).get_char_type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); typet return_type=f.type(); typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(hash.find(it->second)==hash.end()) - hash[it->second]=fresh_symbol("hash", return_type); + auto pair=hash_code_of_string.insert( + std::make_pair(str, fresh_symbol("hash", return_type))); + exprt hash=pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) // c2: |str|!=|s| - // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : hash_code_of_string) { symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(hash[it->second], hash[str]); - not_exprt c2(equal_exprt(it->second.length(), str.length())); + equal_exprt c1(it.second, hash); + not_exprt c2(equal_exprt(it.first.length(), str.length())); and_exprt c3( - equal_exprt(it->second.length(), str.length()), + equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt( str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i)))); axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } - return hash[str]; + return hash; } /*******************************************************************\ @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); symbol_exprt res=fresh_symbol("compare_to", return_type); typet index_type=s1.length().type(); @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // a1 : res==0 => |s1|=|s2| // a2 : forall i<|s1|. s1[i]==s2[i] // a3 : exists x. - // res!=0 ==> x> 0 && - // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) - // &&res=s1[x]-s2[x] ) - // || cond2: - // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) - // a4 : forall i s1[i]=s2[i] + // res!=0 ==> x > 0 + // && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i' s1[i]=s2[i] assert(return_type.id()==ID_signedbv); @@ -282,7 +288,9 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( or_exprt(cond1, cond2))); axioms.push_back(a3); - string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); + string_constraintt a4( + i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; @@ -304,15 +312,15 @@ Function: string_constraint_generatort::add_axioms_for_intern symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); + // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(pool.find(it->second)==pool.end()) - pool[it->second]=fresh_symbol("pool", return_type); + auto pair=intern_of_string.insert( + std::make_pair(str, fresh_symbol("pool", return_type))); + symbol_exprt intern=pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. @@ -320,30 +328,30 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) exprt disj=false_exprt(); - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : intern_of_string) disj=or_exprt( - disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + disj, equal_exprt(intern, it.second)); axioms.push_back(disj); // WARNING: the specification may be incomplete or incorrect - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(it->second!=str) + for(auto it : intern_of_string) + if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( - equal_exprt(pool[it->second], pool[str]), + equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it->second)), + not_exprt(str.axiom_for_has_same_length_as(it.first)), and_exprt( - str.axiom_for_has_same_length_as(it->second), + str.axiom_for_has_same_length_as(it.first), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt(str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i))))))); } - return pool[str]; + return intern; } diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index a7d9c4921c4..fb8e24d5320 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -35,9 +35,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a4 : forall i<|s1|. res[i]=s1[i] // a5 : forall i<|s2|. res[i+|s1|]=s2[i] - exprt a1=res.axiom_for_has_length( - plus_exprt(s1.length(), s2.length())); - axioms.push_back(a1); + res.length()=plus_exprt_with_overflow_check(s1.length(), s2.length()); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); @@ -73,8 +71,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()==2); - string_exprt s1=add_axioms_for_string_expr(args[0]); - string_exprt s2=add_axioms_for_string_expr(args[1]); + string_exprt s1=get_string_expr(args[0]); + string_exprt s2=get_string_expr(args[1]); return add_axioms_for_concat(s1, s2); } @@ -95,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int( args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); @@ -118,7 +116,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -138,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_bool string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -159,7 +157,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_char string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -180,9 +178,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_double string_exprt string_constraint_generatort::add_axioms_for_concat_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float( - args(f, 2)[1], MAX_DOUBLE_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, true); return add_axioms_for_concat(s1, s2); } @@ -201,8 +200,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_float string_exprt string_constraint_generatort::add_axioms_for_concat_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, false); return add_axioms_for_concat(s1, s2); } @@ -222,7 +223,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_code_point string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 64ffabfd266..2905c6a5f21 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -93,9 +93,13 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( const function_application_exprt &f) { assert(f.arguments().empty()); + assert(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt res=fresh_string(ref_type); - axioms.push_back(res.axiom_for_has_length(0)); + exprt size=from_integer(0, ref_type.get_index_type()); + const array_typet &content_type=ref_type.get_content_type(); + array_of_exprt empty_array( + from_integer(0, ref_type.get_content_type().subtype()), content_type); + string_exprt res(size, empty_array, ref_type); return res; } @@ -132,6 +136,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant + assert(false); // TODO: Check if used. On the contrary, discard else. assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 283de683bd6..d799091a573 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -92,15 +92,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: - // a1 : contains => |substring|>=offset>=from_index + // a1 : contains => |str|-|substring|>=offset>=from_index // a2 : !contains => offset=-1 - // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + // a3 : forall 0<=witness<|substring|. + // contains => str[witness+offset]=substring[witness] implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + substring.length(), offset)), binary_relation_exprt(offset, ID_ge, from_index))); axioms.push_back(a1); @@ -138,7 +139,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(substring.length(), offset)), binary_relation_exprt(offset, ID_le, from_index))); axioms.push_back(a1); @@ -173,7 +175,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); assert(f.type()==ref_type.get_index_type()); @@ -186,14 +188,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + assert(refined_string_typet::is_refined_string_type(c.type())); + string_exprt sub=get_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } } @@ -215,7 +218,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( exprt index1=from_integer(1, index_type); exprt minus1=from_integer(-1, index_type); - exprt from_index_plus_one=plus_exprt(from_index, index1); + exprt from_index_plus_one=plus_exprt_with_overflow_check(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); @@ -269,7 +272,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; @@ -282,14 +285,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_last_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + string_exprt sub=get_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 6a080a5dc6c..cd5f4376a93 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -48,8 +48,8 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + string_exprt s2=get_string_expr(args(f, 3)[2]); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -70,7 +70,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int( args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -93,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -115,7 +115,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -136,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_insert_char string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -158,8 +158,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_double string_exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, true); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -179,8 +180,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_float string_exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, false); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -216,7 +218,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( offset=from_integer(0, count.type()); } - string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + string_exprt str=get_string_expr(f.arguments()[0]); const exprt &length=f.arguments()[2]; const exprt &data=f.arguments()[3]; string_exprt arr=add_axioms_from_char_array( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 7fe510c2e35..f8dedaab9a7 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -16,6 +16,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include +#include unsigned string_constraint_generatort::next_symbol_id=1; @@ -122,6 +123,42 @@ symbol_exprt string_constraint_generatort::fresh_boolean( /*******************************************************************\ +Function: string_constraint_generatort::plus_exprt_with_overflow_check + + Inputs: + op1 - First term of the sum + op2 - Second term of the sum + + Outputs: A plus expression representing the sum of the arguments + + Purpose: Create a plus expression while adding extra constraints to + axioms in order to prevent overflows. + +\*******************************************************************/ +plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( + const exprt &op1, const exprt &op2) +{ + plus_exprt sum(plus_exprt(op1, op2)); + + exprt zero=from_integer(0, op1.type()); + + binary_relation_exprt neg1(op1, ID_lt, zero); + binary_relation_exprt neg2(op2, ID_lt, zero); + binary_relation_exprt neg_sum(sum, ID_lt, zero); + + // We prevent overflows by adding the following constraint: + // If the signs of the two operands are the same, then the sign of the sum + // should also be the same. + implies_exprt no_overflow(equal_exprt(neg1, neg2), + equal_exprt(neg1, neg_sum)); + + axioms.push_back(no_overflow); + + return sum; +} + +/*******************************************************************\ + Function: string_constraint_generatort::fresh_string Inputs: a type for string @@ -144,59 +181,113 @@ string_exprt string_constraint_generatort::fresh_string( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_string_expr +Function: string_constraint_generatort::get_string_expr + + Inputs: an expression of refined string type + + Outputs: a string expression + + Purpose: casts an expression to a string expression, or fetches the + actual string_exprt in the case of a symbol. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +{ + assert(refined_string_typet::is_refined_string_type(expr.type())); + + if(expr.id()==ID_symbol) + { + return find_or_add_string_of_symbol( + to_symbol_expr(expr), + to_refined_string_type(expr.type())); + } + else + { + return to_string_expr(expr); + } +} + +string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( + const exprt &jls) +{ + assert(get_mode()==ID_java); + assert(jls.id()==ID_struct); + + exprt length(to_struct_expr(jls).op1()); + // TODO: Add assertion on the type. + // assert(length.type()==refined_string_typet::index_type()); + exprt java_content(to_struct_expr(jls).op2()); + if(java_content.id()==ID_address_of) + { + java_content=to_address_of_expr(java_content).object(); + } + else + { + java_content=dereference_exprt(java_content, java_content.type()); + } + + refined_string_typet type(java_int_type(), java_char_type()); + + return string_exprt(length, java_content, type); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_refined_string - Inputs: an expression of type string + Inputs: an expression of refined string type Outputs: a string expression that is linked to the argument through axioms that are added to the list - Purpose: obtain a refined string expression corresponding to string - variable of string function call + Purpose: obtain a refined string expression corresponding to a expression + of type string \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_for_string_expr( - const exprt &unrefined_string) + +string_exprt string_constraint_generatort::add_axioms_for_refined_string( + const exprt &string) { - string_exprt s; + assert(refined_string_typet::is_refined_string_type(string.type())); + refined_string_typet type=to_refined_string_type(string.type()); - if(unrefined_string.id()==ID_function_application) + // Function applications should have been removed before + assert(string.id()!=ID_function_application); + + if(string.id()==ID_symbol) { - exprt res=add_axioms_for_function_application( - to_function_application_expr(unrefined_string)); - s=to_string_expr(res); + const symbol_exprt &sym=to_symbol_expr(string); + string_exprt s=find_or_add_string_of_symbol(sym, type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_symbol) - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); - else if(unrefined_string.id()==ID_address_of) + else if(string.id()==ID_nondet_symbol) { - assert(unrefined_string.op0().id()==ID_symbol); - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + string_exprt s=fresh_string(type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_if) - s=add_axioms_for_if(to_if_expr(unrefined_string)); - else if(unrefined_string.id()==ID_nondet_symbol || - unrefined_string.id()==ID_struct) + else if(string.id()==ID_if) { - // TODO: for now we ignore non deterministic symbols and struct + return add_axioms_for_if(to_if_expr(string)); } - else if(unrefined_string.id()==ID_typecast) + else if(string.id()==ID_struct) { - exprt arg=to_typecast_expr(unrefined_string).op(); - exprt res=add_axioms_for_string_expr(arg); - s=to_string_expr(res); + const string_exprt &s=to_string_expr(string); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } else { - throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + throw "add_axioms_for_refined_string:\n"+string.pretty()+ "\nwhich is not a function application, "+ - "a symbol or an if expression"; + "a symbol, a struct or an if expression"; } - - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); - return s; } /*******************************************************************\ @@ -216,10 +307,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( { assert( refined_string_typet::is_refined_string_type(expr.true_case().type())); - string_exprt t=add_axioms_for_string_expr(expr.true_case()); + string_exprt t=get_string_expr(expr.true_case()); assert( refined_string_typet::is_refined_string_type(expr.false_case().type())); - string_exprt f=add_axioms_for_string_expr(expr.false_case()); + string_exprt f=get_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -246,7 +337,7 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol Outputs: a string expression - Purpose: if a symbol represent a string is present in the symbol_to_string + Purpose: if a symbol representing a string is present in the symbol_to_string table, returns the corresponding string, if the symbol is not yet present, creates a new string with the correct type depending on whether the mode is java or c, adds it to the table and returns it. @@ -254,12 +345,11 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol \*******************************************************************/ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym) + const symbol_exprt &sym, const refined_string_typet &ref_type) { irep_idt id=sym.get_identifier(); - const refined_string_typet &ref_type=to_refined_string_type(sym.type()); string_exprt str=fresh_string(ref_type); - auto entry=symbol_to_string.insert(std::make_pair(id, str)); + auto entry=unresolved_symbols.insert(std::make_pair(id, str)); return entry.first->second; } @@ -286,125 +376,180 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); + std::string str_id(id.c_str()); + + size_t pos=str_id.find("func_length"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + // TODO: This part needs some improvement. + // Stripping the symbol name is not a very robust process. + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + assert(get_mode()==ID_java); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.length(); + } + else + return to_string_expr(res_it.first->second).length(); + } + + pos = str_id.find("func_data"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.content(); + } + else + return to_string_expr(res_it.first->second).content(); + } + // TODO: improve efficiency of this test by either ordering test by frequency // or using a map + auto res_it=function_application_cache.find(expr); + if(res_it!=function_application_cache.end() && res_it->second!=nil_exprt()) + return res_it->second; + + exprt res; + if(id==ID_cprover_char_literal_func) - return add_axioms_for_char_literal(expr); + res=add_axioms_for_char_literal(expr); else if(id==ID_cprover_string_length_func) - return add_axioms_for_length(expr); + res=add_axioms_for_length(expr); else if(id==ID_cprover_string_equal_func) - return add_axioms_for_equals(expr); + res=add_axioms_for_equals(expr); else if(id==ID_cprover_string_equals_ignore_case_func) - return add_axioms_for_equals_ignore_case(expr); + res=add_axioms_for_equals_ignore_case(expr); else if(id==ID_cprover_string_is_empty_func) - return add_axioms_for_is_empty(expr); + res=add_axioms_for_is_empty(expr); else if(id==ID_cprover_string_char_at_func) - return add_axioms_for_char_at(expr); + res=add_axioms_for_char_at(expr); else if(id==ID_cprover_string_is_prefix_func) - return add_axioms_for_is_prefix(expr); + res=add_axioms_for_is_prefix(expr); else if(id==ID_cprover_string_is_suffix_func) - return add_axioms_for_is_suffix(expr); + res=add_axioms_for_is_suffix(expr); else if(id==ID_cprover_string_startswith_func) - return add_axioms_for_is_prefix(expr, true); + res=add_axioms_for_is_prefix(expr, true); else if(id==ID_cprover_string_endswith_func) - return add_axioms_for_is_suffix(expr, true); + res=add_axioms_for_is_suffix(expr, true); else if(id==ID_cprover_string_contains_func) - return add_axioms_for_contains(expr); + res=add_axioms_for_contains(expr); else if(id==ID_cprover_string_hash_code_func) - return add_axioms_for_hash_code(expr); + res=add_axioms_for_hash_code(expr); else if(id==ID_cprover_string_index_of_func) - return add_axioms_for_index_of(expr); + res=add_axioms_for_index_of(expr); else if(id==ID_cprover_string_last_index_of_func) - return add_axioms_for_last_index_of(expr); + res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) - return add_axioms_for_parse_int(expr); + res=add_axioms_for_parse_int(expr); else if(id==ID_cprover_string_to_char_array_func) - return add_axioms_for_to_char_array(expr); + res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) - return add_axioms_for_code_point_at(expr); + res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) - return add_axioms_for_code_point_before(expr); + res=add_axioms_for_code_point_before(expr); else if(id==ID_cprover_string_code_point_count_func) - return add_axioms_for_code_point_count(expr); + res=add_axioms_for_code_point_count(expr); else if(id==ID_cprover_string_offset_by_code_point_func) - return add_axioms_for_offset_by_code_point(expr); + res=add_axioms_for_offset_by_code_point(expr); else if(id==ID_cprover_string_compare_to_func) - return add_axioms_for_compare_to(expr); + res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) - return add_axioms_from_literal(expr); + res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) - return add_axioms_for_concat(expr); + res=add_axioms_for_concat(expr); else if(id==ID_cprover_string_concat_int_func) - return add_axioms_for_concat_int(expr); + res=add_axioms_for_concat_int(expr); else if(id==ID_cprover_string_concat_long_func) - return add_axioms_for_concat_long(expr); + res=add_axioms_for_concat_long(expr); else if(id==ID_cprover_string_concat_bool_func) - return add_axioms_for_concat_bool(expr); + res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) - return add_axioms_for_concat_char(expr); + res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_double_func) - return add_axioms_for_concat_double(expr); + res=add_axioms_for_concat_double(expr); else if(id==ID_cprover_string_concat_float_func) - return add_axioms_for_concat_float(expr); + res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) - return add_axioms_for_concat_code_point(expr); + res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) - return add_axioms_for_insert(expr); + res=add_axioms_for_insert(expr); else if(id==ID_cprover_string_insert_int_func) - return add_axioms_for_insert_int(expr); + res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - return add_axioms_for_insert_long(expr); + res=add_axioms_for_insert_long(expr); else if(id==ID_cprover_string_insert_bool_func) - return add_axioms_for_insert_bool(expr); + res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) - return add_axioms_for_insert_char(expr); + res=add_axioms_for_insert_char(expr); else if(id==ID_cprover_string_insert_double_func) - return add_axioms_for_insert_double(expr); + res=add_axioms_for_insert_double(expr); else if(id==ID_cprover_string_insert_float_func) - return add_axioms_for_insert_float(expr); + res=add_axioms_for_insert_float(expr); +#if 0 else if(id==ID_cprover_string_insert_char_array_func) - return add_axioms_for_insert_char_array(expr); + res=add_axioms_for_insert_char_array(expr); +#endif else if(id==ID_cprover_string_substring_func) - return add_axioms_for_substring(expr); + res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) - return add_axioms_for_trim(expr); + res=add_axioms_for_trim(expr); else if(id==ID_cprover_string_to_lower_case_func) - return add_axioms_for_to_lower_case(expr); + res=add_axioms_for_to_lower_case(expr); else if(id==ID_cprover_string_to_upper_case_func) - return add_axioms_for_to_upper_case(expr); + res=add_axioms_for_to_upper_case(expr); else if(id==ID_cprover_string_char_set_func) - return add_axioms_for_char_set(expr); + res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_value_of_func) - return add_axioms_for_value_of(expr); + res=add_axioms_for_value_of(expr); else if(id==ID_cprover_string_empty_string_func) - return add_axioms_for_empty_string(expr); + res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) - return add_axioms_for_copy(expr); + res=add_axioms_for_copy(expr); else if(id==ID_cprover_string_of_int_func) - return add_axioms_from_int(expr); + res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) - return add_axioms_from_int_hex(expr); + res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) - return add_axioms_from_float(expr); + res=add_axioms_from_float(expr); else if(id==ID_cprover_string_of_double_func) - return add_axioms_from_double(expr); + res=add_axioms_from_double(expr); else if(id==ID_cprover_string_of_long_func) - return add_axioms_from_long(expr); + res=add_axioms_from_long(expr); else if(id==ID_cprover_string_of_bool_func) - return add_axioms_from_bool(expr); + res=add_axioms_from_bool(expr); else if(id==ID_cprover_string_of_char_func) - return add_axioms_from_char(expr); - else if(id==ID_cprover_string_of_char_array_func) - return add_axioms_from_char_array(expr); + res=add_axioms_from_char(expr); else if(id==ID_cprover_string_set_length_func) - return add_axioms_for_set_length(expr); + res=add_axioms_for_set_length(expr); else if(id==ID_cprover_string_delete_func) - return add_axioms_for_delete(expr); + res=add_axioms_for_delete(expr); else if(id==ID_cprover_string_delete_char_at_func) - return add_axioms_for_delete_char_at(expr); + res=add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) - return add_axioms_for_replace(expr); + res=add_axioms_for_replace(expr); + else if(id==ID_cprover_string_intern_func) + res=add_axioms_for_intern(expr); + else if(id==ID_cprover_string_array_of_char_pointer_func) + res=add_axioms_for_char_pointer(expr); else { std::string msg( @@ -412,13 +557,16 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); throw msg; } + function_application_cache[expr]=res; + return res; } /*******************************************************************\ Function: string_constraint_generatort::add_axioms_for_copy - Inputs: function application with one argument, which is a string + Inputs: function application with one argument, which is a string, + or three arguments: string, integer offset and count Outputs: a new string expression @@ -430,20 +578,20 @@ Function: string_constraint_generatort::add_axioms_for_copy string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); - - // We add axioms: - // a1 : |res|=|s1| - // a2 : forall i<|s1|. s1[i]=res[i] - - axioms.push_back(res.axiom_for_has_same_length_as(s1)); - - symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); - string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); - axioms.push_back(a2); - return res; + const auto &args=f.arguments(); + if(args.size()==1) + { + string_exprt s1=get_string_expr(args[0]); + return s1; + } + else + { + assert(args.size()==3); + string_exprt s1=get_string_expr(args[0]); + exprt offset=args[1]; + exprt count=args[2]; + return add_axioms_for_substring(s1, offset, plus_exprt(offset, count)); + } } /*******************************************************************\ @@ -473,6 +621,28 @@ string_exprt string_constraint_generatort::add_axioms_for_java_char_array( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_char_pointer + + Inputs: an expression of type char + + Outputs: an array expression + + Purpose: for an expression of the form `array[0]` returns `array` + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_pointer( + const function_application_exprt &fun) +{ + exprt char_pointer=args(fun, 1)[0]; + if(char_pointer.id()==ID_index) + return char_pointer.op0(); + // TODO: we do not know what to do in the other cases + assert(false); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_length Inputs: function application with one string argument @@ -486,7 +656,7 @@ Function: string_constraint_generatort::add_axioms_for_length exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.length(); } @@ -511,6 +681,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { + assert(false); // deprecated, we should use add_axioms_for_substring instead const typet &char_type=to_array_type(data.type()).subtype(); const typet &index_type=length.type(); refined_string_typet ref_type(index_type, char_type); @@ -523,7 +694,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); - char_in_tab.op1()=plus_exprt(qvar, offset); + char_in_tab.op1()=plus_exprt_with_overflow_check(qvar, offset); string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); axioms.push_back(a1); @@ -549,6 +720,7 @@ Function: string_constraint_generatort::add_axioms_from_char_array string_exprt string_constraint_generatort::add_axioms_from_char_array( const function_application_exprt &f) { + assert(false); // deprecated, we should use add_axioms_for_substring instead exprt offset; exprt count; if(f.arguments().size()==4) @@ -638,7 +810,7 @@ Function: string_constraint_generatort::add_axioms_for_char_at exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); @@ -660,26 +832,6 @@ Function: string_constraint_generatort::add_axioms_for_to_char_array exprt string_constraint_generatort::add_axioms_for_to_char_array( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.content(); } - -/*******************************************************************\ - -Function: string_constraint_generatort::set_string_symbol_equal_to_expr - - Inputs: a symbol and a string - - Purpose: add a correspondence to make sure the symbol points to the - same string as the second argument - -\*******************************************************************/ - -void string_constraint_generatort::set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str) -{ - if(str.id()==ID_symbol) - assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); - else - assign_to_symbol(sym, add_axioms_for_string_expr(str)); -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8b1c7bca9d4..2a08b021070 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -31,15 +31,15 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset - // a2 : forall 0<=qvar - // s0[witness+offset]=s2[witness] - // a3 : !isprefix => |str| < |prefix|+offset - // || (|str| >= |prefix|+offset &&0<=witness<|prefix| - // &&str[witness+ofsset]!=prefix[witness]) + // a2 : forall 0<=qvar<|prefix|. isprefix => s0[witness+offset]=s2[witness] + // a3 : !isprefix => + // |str|<|prefix|+offset || + // (0<=witness<|prefix| && str[witness+offset]!=prefix[witness]) implies_exprt a1( isprefix, - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + prefix.length(), offset))); axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); @@ -47,7 +47,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], + prefix[qvar])); axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); @@ -55,14 +56,13 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_is_strictly_longer_than(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], + prefix[witness]))); or_exprt s0_notpref_s1( not_exprt( - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), - and_exprt( - witness_diff, str.axiom_for_is_longer_than( - plus_exprt(prefix.length(), offset)))); + plus_exprt_with_overflow_check(prefix.length(), offset))), + witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); axioms.push_back(a3); @@ -88,8 +88,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) offset=from_integer(0, s0.length().type()); @@ -121,7 +121,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s0=get_string_expr(args(f, 1)[0]); axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); @@ -150,8 +150,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -159,7 +159,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] // a3 : !issuffix => - // s1.length > s0.length + // (s1.length > s0.length && witness=-1) // || (s1.length > witness>=0 // &&s1[witness]!=s0[witness + s0.length-s1.length] @@ -177,7 +177,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( - s0.axiom_for_is_strictly_longer_than(s1), + and_exprt(s0.axiom_for_is_strictly_longer_than(s1), + equal_exprt(witness, from_integer(-1, index_type))), and_exprt( notequal_exprt(s0[witness], s1[shifted]), and_exprt( @@ -207,9 +208,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); symbol_exprt contains=fresh_boolean("contains"); typecast_exprt tc_contains(contains, f.type()); - string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); - const typet &index_type=s0.type(); + string_exprt s0=get_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[1]); + const refined_string_typet ref_type=to_refined_string_type(s0.type()); + const typet &index_type=ref_type.get_index_type(); // We add axioms: // a1 : contains => s0.length >= s1.length diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83735e0d5b5..f27fe5d4317 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -29,7 +29,7 @@ Function: string_constraint_generatort::add_axioms_for_set_length string_exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -76,7 +76,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()>=2); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt i(args[1]); exprt j; if(args.size()==3) @@ -110,7 +110,6 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); - symbol_exprt idx=fresh_exist_index("index_substring", index_type); assert(start.type()==index_type); assert(end.type()==index_type); string_exprt res=fresh_string(ref_type); @@ -133,8 +132,11 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( // Warning: check what to do if the string is not long enough axioms.push_back(str.axiom_for_is_longer_than(end)); - string_constraintt a4( - idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); + symbol_exprt idx=fresh_univ_index("QA_index_substring", index_type); + string_constraintt a4(idx, + res.length(), + equal_exprt(res[idx], + str[plus_exprt_with_overflow_check(start, idx)])); axioms.push_back(a4); return res; } @@ -154,7 +156,7 @@ Function: string_constraint_generatort::add_axioms_for_trim string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -173,7 +175,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( // a8 : forall n<|s1|, s[idx+n]=s1[n] // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s| - exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + exprt a1=str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(idx, res.length())); axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); @@ -195,7 +198,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, + res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, @@ -210,7 +214,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, index_type)); + plus_exprt_with_overflow_check(idx, res.length()), + from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -236,7 +241,7 @@ Function: string_constraint_generatort::add_axioms_for_to_lower_case string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -289,7 +294,7 @@ Function: string_constraint_generatort::add_axioms_for_to_upper_case string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -345,7 +350,7 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); @@ -378,7 +383,7 @@ Function: string_constraint_generatort::add_axioms_for_replace string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; @@ -421,10 +426,12 @@ Function: string_constraint_generatort::add_axioms_for_delete_char_at string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); + str, + args(f, 2)[1], + plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); } /*******************************************************************\ @@ -468,6 +475,6 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index d1030e5e085..10fe490259a 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -63,7 +63,8 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], false); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, false); } /*******************************************************************\ @@ -81,7 +82,8 @@ Function: string_constraint_generatort::add_axioms_from_double string_exprt string_constraint_generatort::add_axioms_from_double( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], true); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, true); } /*******************************************************************\ @@ -99,13 +101,12 @@ Function: string_constraint_generatort::add_axioms_from_float \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_float( - const exprt &f, bool double_precision) + const exprt &f, const refined_string_typet &ref_type, bool double_precision) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); const typet &index_type=ref_type.get_index_type(); const typet &char_type=ref_type.get_char_type(); - string_exprt res=fresh_string(ref_type); - const exprt &index24=from_integer(24, ref_type.get_index_type()); + const exprt &index24=from_integer(24, index_type); axioms.push_back(res.axiom_for_is_shorter_than(index24)); string_exprt magnitude=fresh_string(ref_type); @@ -323,13 +324,22 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a1); exprt chr=res[0]; - exprt starts_with_minus=equal_exprt(chr, minus_char); - exprt starts_with_digit=and_exprt( + equal_exprt starts_with_minus(chr, minus_char); + and_exprt starts_with_digit( binary_relation_exprt(chr, ID_ge, zero_char), binary_relation_exprt(chr, ID_le, nine_char)); or_exprt a2(starts_with_digit, starts_with_minus); axioms.push_back(a2); + // These are constraints to detect number that requiere the maximum number + // of digits + exprt smallest_with_max_digits= + from_integer(smallest_by_digit(max_size-1), type); + binary_relation_exprt big_negative( + i, ID_le, unary_minus_exprt(smallest_with_max_digits)); + binary_relation_exprt big_positive(i, ID_ge, smallest_with_max_digits); + or_exprt requieres_max_digits(big_negative, big_positive); + for(size_t size=1; size<=max_size; size++) { // For each possible size, we add axioms: @@ -387,13 +397,18 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a6); } - // we have to be careful when exceeding the maximal size of integers + // when the size is close to the maximum, either the number is very big + // or it is negative + if(size==max_size-1) + { + implies_exprt a7(premise, or_exprt(requieres_max_digits, + starts_with_minus)); + axioms.push_back(a7); + } + // when we reach the maximal size the number is very big in the negative if(size==max_size) { - exprt smallest_with_10_digits=from_integer( - smallest_by_digit(max_size), type); - binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); - implies_exprt a7(premise, big); + implies_exprt a7(premise, and_exprt(starts_with_minus, big_negative)); axioms.push_back(a7); } } @@ -597,6 +612,60 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_correct_number_format + + Inputs: function application with one string expression + + Outputs: an boolean expression + + Purpose: add axioms making the return value true if the given string is + a correct number + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size) +{ + symbol_exprt correct=fresh_boolean("correct_number_format"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); + + exprt chr=str[0]; + equal_exprt starts_with_minus(chr, minus_char); + equal_exprt starts_with_plus(chr, plus_char); + and_exprt starts_with_digit( + binary_relation_exprt(chr, ID_ge, zero_char), + binary_relation_exprt(chr, ID_le, nine_char)); + + or_exprt correct_first( + or_exprt(starts_with_minus, starts_with_plus), starts_with_digit); + exprt has_first=str.axiom_for_is_longer_than(from_integer(1, index_type)); + implies_exprt a1(correct, and_exprt(has_first, correct_first)); + axioms.push_back(a1); + + exprt not_too_long=str.axiom_for_is_shorter_than(max_size); + axioms.push_back(not_too_long); + + symbol_exprt qvar=fresh_univ_index("number_format", index_type); + + and_exprt is_digit( + binary_relation_exprt(str[qvar], ID_ge, zero_char), + binary_relation_exprt(str[qvar], ID_le, nine_char)); + + string_constraintt a2( + qvar, from_integer(1, index_type), str.length(), correct, is_digit); + + axioms.push_back(a2); + return correct; +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_parse_int Inputs: function application with one string expression @@ -610,7 +679,7 @@ Function: string_constraint_generatort::add_axioms_for_parse_int exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); const typet &type=f.type(); symbol_exprt i=fresh_symbol("parsed_int", type); const refined_string_typet &ref_type=to_refined_string_type(str.type()); @@ -626,6 +695,10 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( exprt starts_with_plus=equal_exprt(chr, plus_char); exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char); + // TODO: we should throw an exception when this does not hold: + exprt correct=add_axioms_for_correct_number_format(str); + axioms.push_back(correct); + for(unsigned size=1; size<=10; size++) { exprt sum=from_integer(0, type); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index db9a2c460cc..1391ffd2315 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -11,17 +11,38 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ #include +#include #include #include #include +#include +#include #include #include #include +#include + +/*******************************************************************\ + +Constructor: string_refinementt + + Inputs: a namespace, a decision procedure, a bound on the number + of refinements and a boolean flag `concretize_result` + + Purpose: refinement_bound is a bound on the number of refinement allowed. + if `concretize_result` is set to true, at the end of the decision + procedure, the solver try to find a concrete value for each + character + +\*******************************************************************/ string_refinementt::string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound): + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound): supert(_ns, _prop), use_counter_example(false), + do_concretizing(false), initial_loop_bound(refinement_bound) { } @@ -52,15 +73,27 @@ Function: string_refinementt::display_index_set void string_refinementt::display_index_set() { + std::size_t count=0; + std::size_t count_current=0; for(const auto &i : index_set) { const exprt &s=i.first; - debug() << "IS(" << from_expr(s) << ")=={"; + debug() << "IS(" << from_expr(s) << ")=={" << eom; for(auto j : i.second) - debug() << from_expr(j) << "; "; + { + if(current_index_set[i.first].find(j)!=current_index_set[i.first].end()) + { + count_current++; + debug() << "**"; + } + debug() << " " << from_expr(j) << ";" << eom; + count++; + } debug() << "}" << eom; } + debug() << count << " elements in index set (" << count_current + << " newly added)" << eom; } /*******************************************************************\ @@ -100,152 +133,320 @@ void string_refinementt::add_instantiations() /*******************************************************************\ -Function: string_refinementt::convert_rest - - Inputs: an expression +Function: string_refinementt::add_symbol_to_symbol_map() - Outputs: a literal + Inputs: a symbol and the expression to map it to - Purpose: if the expression is a function application, we convert it - using our own convert_function_application method + Purpose: keeps a map of symbols to expressions, such as none of the + mapped values exist as a key \*******************************************************************/ -literalt string_refinementt::convert_rest(const exprt &expr) +void string_refinementt::add_symbol_to_symbol_map +(const exprt &lhs, const exprt &rhs) { - if(expr.id()==ID_function_application) + assert(lhs.id()==ID_symbol); + + // We insert the mapped value of the rhs, if it exists. + auto it=symbol_resolve.find(rhs); + const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; + + symbol_resolve[lhs]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(lhs); + + std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); + for(exprt item : symbols_to_update_with_new_rhs) { - // can occur in __CPROVER_assume - bvt bv=convert_function_application(to_function_application_expr(expr)); - assert(bv.size()==1); - return bv[0]; + symbol_resolve[item]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(item); } - else +} + +/*******************************************************************\ + +Function: string_refinementt::set_char_array_equality() + + Inputs: the rhs and lhs of an equality over character arrays + + Purpose: add axioms if the rhs is a character array + +\*******************************************************************/ + +void string_refinementt::set_char_array_equality( + const exprt &lhs, const exprt &rhs) +{ + assert(lhs.id()==ID_symbol); + + if(rhs.id()==ID_array && rhs.type().id()==ID_array) { - return supert::convert_rest(expr); + const typet &index_type=to_array_type(rhs.type()).size().type(); + for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) + { + // Introduce axioms to map symbolic rhs to its char array. + index_exprt arraycell(rhs, from_integer(i, index_type)); + equal_exprt arrayeq(arraycell, rhs.operands()[i]); + add_lemma(arrayeq, false); +#if 0 + generator.axioms.push_back(arrayeq); +#endif + } } + // At least for Java (as it is currently pre-processed), we need not consider + // other cases, because all character arrays find themselves on the rhs of an + // equality. Note that this might not be the case for other languages. } /*******************************************************************\ -Function: string_refinementt::convert_symbol +Function: string_refinementt::substitute_function_applications() - Inputs: an expression + Inputs: an expression containing function applications - Outputs: a bitvector + Outputs: an epression containing no function application - Purpose: if the expression as string type, look up for the string in the - list of string symbols that we maintain, and convert it; - otherwise use the method of the parent class + Purpose: remove functions applications and create the necessary + axioms \*******************************************************************/ -bvt string_refinementt::convert_symbol(const exprt &expr) +exprt string_refinementt::substitute_function_applications(exprt expr) { - const typet &type=expr.type(); - const irep_idt &identifier=expr.get(ID_identifier); - assert(!identifier.empty()); + for(size_t i=0; iMAX_CONCRETE_STRING_SIZE? + MAX_CONCRETE_STRING_SIZE:concretize_limit; + exprt content_expr=str.content(); + replace_expr(current_model, content_expr); + for(size_t i=0; i &pair : non_string_axioms) + { + replace_expr(symbol_resolve, pair.first); + debug() << "super::set_to " << from_expr(pair.first) << eom; + supert::set_to(pair.first, pair.second); + } + + for(exprt &axiom : generator.axioms) + { + replace_expr(symbol_resolve, axiom); if(axiom.id()==ID_string_constraint) { string_constraintt c=to_string_constraint(axiom); @@ -282,6 +494,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() { add_lemma(axiom); } + } initial_index_set(universal_axioms); update_index_set(cur); @@ -302,6 +515,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; + concretize_lengths(); return D_SATISFIABLE; } @@ -318,7 +532,13 @@ decision_proceduret::resultt string_refinementt::dec_solve() if(current_index_set.empty()) { debug() << "current index set is empty" << eom; - return D_SATISFIABLE; + if(do_concretizing) + { + concretize_results(); + do_concretizing=false; + } + else + return D_SATISFIABLE; } display_index_set(); @@ -367,131 +587,337 @@ bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) Function: string_refinementt::add_lemma - Inputs: a lemma + Inputs: a lemma and Boolean value stating whether the lemma should + be added to the index set. Purpose: add the given lemma to the solver \*******************************************************************/ -void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) +void string_refinementt::add_lemma( + const exprt &lemma, bool _simplify, bool add_to_index_set) { if(!seen_instances.insert(lemma).second) return; - if(lemma.is_true()) + if(add_to_index_set) + cur.push_back(lemma); + + exprt simple_lemma=lemma; + if(_simplify) + simplify(simple_lemma, ns); + + if(simple_lemma.is_true()) { +#if 0 debug() << "string_refinementt::add_lemma : tautology" << eom; +#endif return; } - debug() << "adding lemma " << from_expr(lemma) << eom; + debug() << "adding lemma " << from_expr(simple_lemma) << eom; - prop.l_set_to_true(convert(lemma)); - if(add_to_index_set) - cur.push_back(lemma); + prop.l_set_to_true(convert(simple_lemma)); } /*******************************************************************\ -Function: string_refinementt::string_of_array +Function: string_refinementt::get_array - Inputs: a constant array expression and a integer expression + Inputs: an expression representing an array and an expression + representing an integer - Outputs: a string + Outputs: an array expression or an array_of_exprt - Purpose: convert the content of a string to a more readable representation. - This should only be used for debbuging. + Purpose: get a model of an array and put it in a certain form. + If the size cannot be obtained or if it is too big, return an + empty array. \*******************************************************************/ -std::string string_refinementt::string_of_array( - const exprt &arr, const exprt &size) const +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const { - if(size.id()!=ID_constant) - return "string of unknown size"; + exprt arr_val=get_array(arr); + exprt size_val=supert::get(size); + size_val=simplify_expr(size_val, ns); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); + array_typet empty_ret_type(char_type, from_integer(0, index_type)); + array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + + if(size_val.id()!=ID_constant) + { +#if 0 + debug() << "(sr::get_array) string of unknown size: " + << from_expr(size_val) << eom; +#endif + return empty_ret; + } + unsigned n; - if(to_unsigned_integer(to_constant_expr(size), n)) - n=0; + if(to_unsigned_integer(to_constant_expr(size_val), n)) + { +#if 0 + debug() << "(sr::get_array) size is not valid" << eom; +#endif + return empty_ret; + } + + array_typet ret_type(char_type, from_integer(n, index_type)); + array_exprt ret(ret_type); if(n>MAX_CONCRETE_STRING_SIZE) - return "very long string"; + { +#if 0 + debug() << "(sr::get_array) long string (size=" << n << ")" << eom; +#endif + return empty_ret; + } + if(n==0) - return "\"\""; + { +#if 0 + debug() << "(sr::get_array) empty string" << eom; +#endif + return empty_ret; + } - std::ostringstream buf; - buf << "\""; - exprt val=get(arr); + std::vector concrete_array(n); - if(val.id()=="array-list") + if(arr_val.id()=="array-list") { - for(size_t i=0; i(c); + exprt value=arr_val.operands()[i*2+1]; + to_unsigned_integer(to_constant_expr(value), concrete_array[idx]); } } } } + else if(arr_val.id()==ID_array) + { + for(size_t i=0; i=32) + result << (unsigned char) c; + else + { + result << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << (unsigned int) c; + } + } + + return result.str(); +} + +/*******************************************************************\ - for(size_t i=0; i &m, bool negated) const + std::map &m, const typet &type, bool negated) const { exprt sum=nil_exprt(); mp_integer constants=0; typet index_type; if(m.empty()) - return nil_exprt(); + return from_integer(0, type); else index_type=m.begin()->first.type(); @@ -723,12 +1122,20 @@ exprt string_refinementt::sum_over_map( default: if(second>1) { - for(int i=0; isecond; i--) + if(sum.is_nil()) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + for(int i=-1; i>second; i--) sum=minus_exprt(sum, t); } } @@ -755,7 +1162,7 @@ Function: string_refinementt::simplify_sum exprt string_refinementt::simplify_sum(const exprt &f) const { std::map map=map_representation_of_sum(f); - return sum_over_map(map); + return sum_over_map(map, f.type()); } /*******************************************************************\ @@ -800,7 +1207,7 @@ exprt string_refinementt::compute_inverse_function( } elems.erase(it); - return sum_over_map(elems, neg); + return sum_over_map(elems, f.type(), neg); } @@ -830,7 +1237,7 @@ Function: find_qvar Outputs: a Boolean - Purpose: looks for the symbol and return true if it is found + Purpose: look for the symbol and return true if it is found \*******************************************************************/ @@ -885,6 +1292,26 @@ Function: string_refinementt::initial_index_set and the upper bound minus one \*******************************************************************/ +void string_refinementt::add_to_index_set(const exprt &s, exprt i) +{ + simplify(i, ns); + if(i.id()==ID_constant) + { + mp_integer mpi; + to_integer(i, mpi); + if(mpi<0) + { + debug() << "add_to_index_set : ignoring negative number " << mpi << eom; + return; + } + } + if(index_set[s].insert(i).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(i) << eom; + current_index_set[s].insert(i); + } +} void string_refinementt::initial_index_set(const string_constraintt &axiom) { @@ -906,8 +1333,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) // if cur is of the form s[i] and no quantified variable appears in i if(!has_quant_var) { - current_index_set[s].insert(i); - index_set[s].insert(i); + add_to_index_set(s, i); } else { @@ -917,8 +1343,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) axiom.upper_bound(), from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); - current_index_set[s].insert(e); - index_set[s].insert(e); + add_to_index_set(s, e); } } else @@ -952,12 +1377,7 @@ void string_refinementt::update_index_set(const exprt &formula) const exprt &i=cur.op1(); assert(s.type().id()==ID_array); exprt simplified=simplify_sum(i); - if(index_set[s].insert(simplified).second) - { - debug() << "adding to index set of " << from_expr(s) - << ": " << from_expr(simplified) << eom; - current_index_set[s].insert(simplified); - } + add_to_index_set(s, simplified); } else { @@ -1014,19 +1434,18 @@ exprt find_index(const exprt &expr, const exprt &str) catch (exprt i) { return i; } } - /*******************************************************************\ Function: string_refinementt::instantiate - Inputs: an universaly quantified formula `axiom`, an array of char + Inputs: a universally quantified formula `axiom`, an array of char variable `str`, and an index expression `val`. - Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + Outputs: substitute `qvar` the universally quantified variable of `axiom`, by an index `val`, in `axiom`, so that the index used for `str` equals `val`. For instance, if `axiom` corresponds to - $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return - an expression for $s[v]='a' && t[v-x]='b'$. + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` + would return an expression for $s[v]='a' && t[v-x]='b'$. \*******************************************************************/ @@ -1051,6 +1470,17 @@ exprt string_refinementt::instantiate( return implies_exprt(bounds, instance); } +/*******************************************************************\ + +Function: string_refinementt::instantiate_not_contains + + Inputs: a quantified formula representing `not_contains`, and a + list to which to add the created lemmas to + + Purpose: instantiate a quantified formula representing `not_contains` + by substituting the quantifiers and generating axioms + +\*******************************************************************/ void string_refinementt::instantiate_not_contains( const string_not_contains_constraintt &axiom, std::list &new_lemmas) @@ -1102,3 +1532,76 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(witness_bounds); } } + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_lists() + + Inputs: an expression containing array-list expressions + + Outputs: an epression containing no array-list + + Purpose: replace array-lists by 'with' expressions + +\*******************************************************************/ + +exprt string_refinementt::substitute_array_lists(exprt expr) const +{ + for(size_t i=0; i=2); + typet &char_type=expr.operands()[1].type(); + array_typet arr_type(char_type, infinity_exprt(char_type)); + array_of_exprt new_arr(from_integer(0, char_type), + arr_type); + + with_exprt ret_expr(new_arr, + expr.operands()[0], + expr.operands()[1]); + + for(size_t i=2; isecond); + } + + ecopy=supert::get(ecopy); + + return substitute_array_lists(ecopy); +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index be0cd332bc3..252146f4620 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -14,6 +14,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #include +#include #include #include @@ -27,30 +28,32 @@ Author: Alberto Griggio, alberto.griggio@gmail.com class string_refinementt: public bv_refinementt { public: - // refinement_bound is a bound on the number of refinement allowed string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound); + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound); void set_mode(); // Should we use counter examples at each iteration? bool use_counter_example; - virtual std::string decision_procedure_text() const + bool do_concretizing; + + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); } static exprt is_positive(const exprt &x); + exprt get(const exprt &expr) const override; + protected: typedef std::set expr_sett; + typedef std::list exprt_listt; - virtual bvt convert_symbol(const exprt &expr); - virtual bvt convert_function_application( - const function_application_exprt &expr); - - decision_proceduret::resultt dec_solve(); + decision_proceduret::resultt dec_solve() override; bvt convert_bool_bv(const exprt &boole, const exprt &orig); @@ -60,6 +63,9 @@ class string_refinementt: public bv_refinementt unsigned initial_loop_bound; + // Is the current model correct + bool concrete_model; + string_constraint_generatort generator; // Simple constraints that have been given to the solver @@ -76,23 +82,42 @@ class string_refinementt: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions std::map current_index_set; std::map index_set; + replace_mapt symbol_resolve; + std::map reverse_symbol_resolve; + std::list> non_string_axioms; - void display_index_set(); + // Valuation in the current model of the symbols that have been created + // by the solver + replace_mapt current_model; - void add_lemma(const exprt &lemma, bool add_to_index_set=true); + void add_equivalence(const irep_idt & lhs, const exprt & rhs); - bool boolbv_set_equality_to_true(const equal_exprt &expr); + void display_index_set(); - literalt convert_rest(const exprt &expr); + void add_lemma(const exprt &lemma, + bool simplify=true, + bool add_to_index_set=true); - void add_instantiations(); + exprt substitute_function_applications(exprt expr); + typet substitute_java_string_types(typet type); + exprt substitute_java_strings(exprt expr); + void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); + bool is_char_array(const typet &type) const; + bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); + void set_to(const exprt &expr, bool value) override; + void add_instantiations(); + void add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver); + void fill_model(); bool check_axioms(); + void set_char_array_equality(const exprt &lhs, const exprt &rhs); void update_index_set(const exprt &formula); void update_index_set(const std::vector &cur); void initial_index_set(const string_constraintt &axiom); void initial_index_set(const std::vector &string_axioms); + void add_to_index_set(const exprt &s, exprt i); exprt instantiate( const string_constraintt &axiom, const exprt &str, const exprt &val); @@ -101,17 +126,26 @@ class string_refinementt: public bv_refinementt const string_not_contains_constraintt &axiom, std::list &new_lemmas); + exprt substitute_array_lists(exprt) const; + exprt compute_inverse_function( const exprt &qvar, const exprt &val, const exprt &f); std::map map_representation_of_sum(const exprt &f) const; - exprt sum_over_map(std::map &m, bool negated=false) const; + exprt sum_over_map( + std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; - exprt get_array(const exprt &arr, const exprt &size); + void concretize_results(); + void concretize_lengths(); + // Length of char arrays found during concretization + std::map found_length; + + exprt get_array(const exprt &arr, const exprt &size) const; + exprt get_array(const exprt &arr) const; - std::string string_of_array(const exprt &arr, const exprt &size) const; + std::string string_of_array(const array_exprt &arr); }; #endif From 6958a561df0a601687cdee2230d4df4f8f3a6176 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 22 Mar 2017 15:05:40 +0000 Subject: [PATCH 5/5] Need to assign length before calls since it can be overwritten by functions with side-effect --- src/goto-programs/string_refine_preprocess.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index 8403d4bf044..7243f66049a 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -361,7 +361,16 @@ exprt string_refine_preprocesst::make_cprover_string_assign( std::list assignments; // 1) cprover_string_length= *(rhs->length) + symbolt sym_length=get_fresh_aux_symbol( + length_type, + "cprover_string_length", + "cprover_string_length", + location, + ID_java, + symbol_table); + symbol_exprt cprover_length=sym_length.symbol_expr(); member_exprt length(deref, "length", length_type); + assignments.emplace_back(cprover_length, length); // 2) cprover_string_array = *(rhs->data) symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); @@ -373,7 +382,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( // This assignment is useful for finding witnessing strings for counter // examples refined_string_typet ref_type(length_type, java_char_type()); - string_exprt new_rhs(length, array_lhs, ref_type); + string_exprt new_rhs(cprover_length, array_lhs, ref_type); symbol_exprt lhs=fresh_string(new_rhs.type(), location); assignments.emplace_back(lhs, new_rhs);