From c6c1b3f1ab9503d770d76a333544534eef034d40 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Jan 2018 13:35:04 +0000 Subject: [PATCH 1/8] Add an optional guard to add_axioms_for_constant This allows to add a condition under which we want the result to be equal to the given constant. By default this is true, meaning that the result should always equal the constant. --- src/solvers/refinement/string_constraint_generator.h | 6 +++++- .../refinement/string_constraint_generator_constants.cpp | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 1f00a0bbe7e..9cf768fd12e 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -164,7 +164,11 @@ class string_constraint_generatort final const exprt &end_index); exprt add_axioms_for_concat(const function_application_exprt &f); exprt add_axioms_for_concat_code_point(const function_application_exprt &f); - exprt add_axioms_for_constant(const array_string_exprt &res, irep_idt sval); + exprt add_axioms_for_constant( + const array_string_exprt &res, + irep_idt sval, + const exprt &guard = true_exprt()); + exprt add_axioms_for_delete( const array_string_exprt &res, const array_string_exprt &str, diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index d5d6d4eeb0c..8acba327066 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -19,10 +19,12 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// equal. /// \param res: array of characters for the result /// \param sval: a string constant +/// \param guard: condition under which the axiom should apply, true by default /// \return integer expression equal to zero exprt string_constraint_generatort::add_axioms_for_constant( const array_string_exprt &res, - irep_idt sval) + irep_idt sval, + const exprt &guard) { const typet &index_type = res.length().type(); const typet &char_type = res.content().type().subtype(); @@ -43,12 +45,12 @@ exprt string_constraint_generatort::add_axioms_for_constant( const exprt idx = from_integer(i, index_type); const exprt c = from_integer(str[i], char_type); const equal_exprt lemma(res[idx], c); - axioms.push_back(lemma); + axioms.push_back(implies_exprt(guard, lemma)); } const exprt s_length = from_integer(str.size(), index_type); - axioms.push_back(res.axiom_for_has_length(s_length)); + axioms.push_back(implies_exprt(guard, equal_exprt(res.length(), s_length))); return from_integer(0, get_return_code_type()); } From 74144fcb136dec864eb8c3a8bbc7ddabeb05876f Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Jan 2018 13:41:11 +0000 Subject: [PATCH 2/8] Handle if_exprt in add_axioms_for_string_literal cprover_string_literal was requiring a string constant as argument, but we now deal with if expressions as well. So expressions like: `cprover_string_literal_func!0#0(cprover_string_length$3!0@1#2, nondet_infinite_array$5!0@1, this!0@1#1 == &tmp_object_factory$17!0 ? tmp_object_factory$17!0#4.@class_identifier : this$object#0.@class_identifier)` are correctly handled by the string solver. --- .../refinement/string_constraint_generator.h | 5 +++ .../string_constraint_generator_constants.cpp | 37 +++++++++++++++++-- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 9cf768fd12e..d2c76eb6faa 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -199,6 +199,11 @@ class string_constraint_generatort final exprt add_axioms_for_insert_char(const function_application_exprt &f); exprt add_axioms_for_insert_float(const function_application_exprt &f); exprt add_axioms_for_insert_double(const function_application_exprt &f); + + exprt add_axioms_for_cprover_string( + const array_string_exprt &res, + const exprt &arg, + const exprt &guard); exprt add_axioms_from_literal(const function_application_exprt &f); exprt add_axioms_from_int(const function_application_exprt &f); exprt add_axioms_from_int( diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 8acba327066..fad3a2d9e3d 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -67,6 +67,39 @@ exprt string_constraint_generatort::add_axioms_for_empty_string( return from_integer(0, get_return_code_type()); } +/// Convert an expression of type string_typet to a string_exprt +/// \param res: string expression for the result +/// \param arg: expression of type string typet +/// \param guard: condition under which `res` should be equal to arg +/// \return 0 if constraints were added, 1 if expression could not be handled +/// and no constraint was added. Expression we can handle are of the +/// form \f$ e := "" | (expr)? e : e \f$ +exprt string_constraint_generatort::add_axioms_for_cprover_string( + const array_string_exprt &res, + const exprt &arg, + const exprt &guard) +{ + if(const auto if_expr = expr_try_dynamic_cast(arg)) + { + const and_exprt guard_true(guard, if_expr->cond()); + const exprt return_code_true = + add_axioms_for_cprover_string(res, if_expr->true_case(), guard_true); + + const and_exprt guard_false(guard, not_exprt(if_expr->cond())); + const exprt return_code_false = + add_axioms_for_cprover_string(res, if_expr->false_case(), guard_false); + + return if_exprt( + equal_exprt(return_code_true, from_integer(0, get_return_code_type())), + return_code_false, + return_code_true); + } + else if(const auto constant_expr = expr_try_dynamic_cast(arg)) + return add_axioms_for_constant(res, constant_expr->get_value(), guard); + else + return from_integer(1, get_return_code_type()); +} + /// String corresponding to an internal cprover string /// /// Add axioms ensuring that the returned string expression is equal to the @@ -81,7 +114,5 @@ exprt string_constraint_generatort::add_axioms_from_literal( const function_application_exprt::argumentst &args=f.arguments(); PRECONDITION(args.size() == 3); // Bad args to string literal? const array_string_exprt res = char_array_of_pointer(args[1], args[0]); - const exprt &arg = args[2]; - irep_idt sval=to_constant_expr(arg).get_value(); - return add_axioms_for_constant(res, sval); + return add_axioms_for_cprover_string(res, args[2], true_exprt()); } From 4699c13d5ede7312234be69137f6c53f5aeffa03 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 8 Jan 2018 08:02:19 +0000 Subject: [PATCH 3/8] Extend symbol resolution to string_typet We take into account expressions of type string_typet in the symbol_resolve object, so that these values can be propagated. This can be particularly usefull for getting class identifiers as strings, since these are represented as expression type string_typet in the goto program. This also adds a has_subtype function with predicate as arg to generalize what has_char_pointer_subtype and has_string_subtype are doing. --- src/solvers/refinement/string_refinement.cpp | 117 ++++++++++++++++++- src/solvers/refinement/string_refinement.h | 4 + 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index fec692c9bf9..a84cb80a249 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -293,7 +293,7 @@ bool is_char_type(const typet &type) /// Distinguish char array from other types. /// For now, any unsigned bitvector type is considered a character. /// \param type: a type -/// \param ns: name space +/// \param ns: namespace /// \return true if the given type is an array of characters bool is_char_array_type(const typet &type, const namespacet &ns) { @@ -311,8 +311,46 @@ bool is_char_pointer_type(const typet &type) } /// \param type: a type -/// \param ns: name space -/// \return true if a subtype is an pointer of characters +/// \param pred: a predicate +/// \return true if one of the subtype of `type` satisfies predicate `pred`. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +/// For instance in the type `t` defined by +/// `{ int a; char[] b; double * c; { bool d} e}`, `int`, `char`, +/// `double` and `bool` are subtypes of `t`. +bool has_subtype( + const typet &type, + const std::function &pred) +{ + if(pred(type)) + return true; + + if(type.id() == ID_struct || type.id() == ID_union) + { + const struct_union_typet &struct_type = to_struct_union_type(type); + return std::any_of( + struct_type.components().begin(), + struct_type.components().end(), // NOLINTNEXTLINE + [&](const struct_union_typet::componentt &comp) { + return has_subtype(comp.type(), pred); + }); + } + + return std::any_of( // NOLINTNEXTLINE + type.subtypes().begin(), type.subtypes().end(), [&](const typet &t) { + return has_subtype(t, pred); + }); +} + +/// \param type: a type +/// \param ns: namespace +/// \return true if a subtype of `type` is an pointer of characters. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... static bool has_char_pointer_subtype(const typet &type, const namespacet &ns) { if(is_char_pointer_type(type)) @@ -336,8 +374,21 @@ static bool has_char_pointer_subtype(const typet &type, const namespacet &ns) return false; } +/// \param type: a type +/// \return true if a subtype of `type` is string_typet. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +static bool has_string_subtype(const typet &type) +{ + // NOLINTNEXTLINE + return has_subtype( + type, [](const typet &subtype) { return subtype == string_typet(); }); +} + /// \param expr: an expression -/// \param ns: name space +/// \param ns: namespace /// \return true if a subexpression of `expr` is an array of characters static bool has_char_array_subexpr(const exprt &expr, const namespacet &ns) { @@ -447,6 +498,53 @@ static union_find_replacet generate_symbol_resolution_from_equations( return solver; } +/// Symbol resolution for expressions of type string typet +/// \param equations: list of equations +/// \param ns: namespace +/// \param stream: output stream +/// \return union_find_replacet structure containing the correspondences. +static union_find_replacet string_identifiers_resolution_from_equations( + std::vector &equations, + const namespacet &ns, + messaget::mstreamt &stream) +{ + const auto eom = messaget::eom; + const std::string log_message = + "WARNING string_refinement.cpp " + "string_identifiers_resolution_from_equations:"; + + union_find_replacet result; + for(const equal_exprt &eq : equations) + { + if(eq.rhs().type() == string_typet()) + result.make_union(eq.lhs(), eq.rhs()); + else if(has_string_subtype(eq.lhs().type())) + { + if(eq.rhs().type().id() == ID_struct) + { + const struct_typet &struct_type = to_struct_type(eq.rhs().type()); + for(const auto &comp : struct_type.components()) + { + if(comp.type() == string_typet()) + { + const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type()); + const exprt rhs_data = simplify_expr( + member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns); + result.make_union(lhs_data, rhs_data); + } + } + } + else + { + stream << log_message << "non struct with string subexpr " + << from_expr(ns, "", eq.rhs()) << "\n * of type " + << from_type(ns, "", eq.rhs().type()) << eom; + } + } + } + return result; +} + void output_equations( std::ostream &output, const std::vector &equations, @@ -539,8 +637,19 @@ decision_proceduret::resultt string_refinementt::dec_solve() << from_expr(ns, "", pair.second) << eom; #endif + const union_find_replacet string_id_symbol_resolve = + string_identifiers_resolution_from_equations(equations, ns, debug()); + debug() << "dec_solve: Replacing char pointer symbols" << eom; replace_symbols_in_equations(symbol_resolve, equations); + + debug() << "dec_solve: Replacing string ids in function applications" << eom; + for(equal_exprt &eq : equations) + { + if(can_cast_expr(eq.rhs())) + string_id_symbol_resolve.replace_expr(eq.rhs()); + } + #ifdef DEBUG output_equations(debug(), equations, ns); #endif diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index b890f9e40d5..53515b51685 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -112,4 +112,8 @@ exprt concretize_arrays_in_expression( bool is_char_array_type(const typet &type, const namespacet &ns); +bool has_subtype( + const typet &type, + const std::function &pred); + #endif From 3dd38773b31f1b2f68962aa0d717cd3b3039d031 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 8 Jan 2018 08:25:46 +0000 Subject: [PATCH 4/8] Refactor has_char_pointer_subtype with has_subtype We reuse has_subtype to simplify the function. Also get rid of the namespace argument that was not used. --- src/solvers/refinement/string_refinement.cpp | 25 +++----------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index a84cb80a249..b70581640e3 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -345,33 +345,14 @@ bool has_subtype( } /// \param type: a type -/// \param ns: namespace /// \return true if a subtype of `type` is an pointer of characters. /// The meaning of "subtype" is in the algebraic datatype sense: /// for example, the subtypes of a struct are the types of its /// components, the subtype of a pointer is the type it points to, /// etc... -static bool has_char_pointer_subtype(const typet &type, const namespacet &ns) +static bool has_char_pointer_subtype(const typet &type) { - if(is_char_pointer_type(type)) - return true; - - if(type.id() == ID_struct || type.id() == ID_union) - { - const struct_union_typet &struct_type = to_struct_union_type(type); - for(const auto &comp : struct_type.components()) - { - if(has_char_pointer_subtype(comp.type(), ns)) - return true; - } - } - - for(const auto &t : type.subtypes()) - { - if(has_char_pointer_subtype(t, ns)) - return true; - } - return false; + return has_subtype(type, is_char_pointer_type); } /// \param type: a type @@ -471,7 +452,7 @@ static union_find_replacet generate_symbol_resolution_from_equations( // function applications can be ignored because they will be replaced // in the convert_function_application step of dec_solve } - else if(has_char_pointer_subtype(lhs.type(), ns)) + else if(has_char_pointer_subtype(lhs.type())) { if(rhs.type().id() == ID_struct) { From 954060ef49cbbfb20a0c5b0aec872cc8a5646844 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 8 Jan 2018 09:22:48 +0000 Subject: [PATCH 5/8] Add unit test for has_subtype --- unit/Makefile | 1 + .../string_refinement/has_subtype.cpp | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 unit/solvers/refinement/string_refinement/has_subtype.cpp diff --git a/unit/Makefile b/unit/Makefile index c8f7b1b5613..ab790bc730d 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -31,6 +31,7 @@ SRC += unit_tests.cpp \ solvers/refinement/string_constraint_generator_valueof/is_digit_with_radix.cpp \ solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \ solvers/refinement/string_refinement/concretize_array.cpp \ + solvers/refinement/string_refinement/has_subtype.cpp \ solvers/refinement/string_refinement/substitute_array_list.cpp \ solvers/refinement/string_refinement/union_find_replace.cpp \ util/expr_cast/expr_cast.cpp \ diff --git a/unit/solvers/refinement/string_refinement/has_subtype.cpp b/unit/solvers/refinement/string_refinement/has_subtype.cpp new file mode 100644 index 00000000000..6b0e419e8d2 --- /dev/null +++ b/unit/solvers/refinement/string_refinement/has_subtype.cpp @@ -0,0 +1,61 @@ +/*******************************************************************\ + + Module: Unit tests for has_subtype in + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include + +// Curryfied version of type comparison. +// Useful for the predicate argument of has_subtype +static std::function is_type(const typet &t1) +{ + auto f = [&](const typet &t2) { return t1 == t2; }; + return f; +} + +SCENARIO("has_subtype", "[core][solvers][refinement][string_refinement]") +{ + const typet char_type = java_char_type(); + const typet int_type = java_int_type(); + const typet bool_type = java_boolean_type(); + + REQUIRE(has_subtype(char_type, is_type(char_type))); + REQUIRE_FALSE(has_subtype(char_type, is_type(int_type))); + + GIVEN("a struct with char and int fields") + { + struct_typet struct_type; + struct_type.components().emplace_back("char_field", char_type); + struct_type.components().emplace_back("int_field", int_type); + THEN("char and int are subtypes") + { + REQUIRE(has_subtype(struct_type, is_type(char_type))); + REQUIRE(has_subtype(struct_type, is_type(int_type))); + } + THEN("bool is not a subtype") + { + REQUIRE_FALSE(has_subtype(struct_type, is_type(bool_type))); + } + } + + GIVEN("a pointer to char") + { + pointer_typet ptr_type = pointer_type(char_type); + THEN("char is a subtype") + { + REQUIRE(has_subtype(ptr_type, is_type(char_type))); + } + THEN("int is not a subtype") + { + REQUIRE_FALSE(has_subtype(ptr_type, is_type(int_type))); + } + } +} From ae4deff6d170a4e31ac34aa24970930691948b0c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 11 Jan 2018 12:21:19 +0000 Subject: [PATCH 6/8] Debug information for string equations We number equations to make it easier to debug function manipulating equations, in particular string_identifiers_resolution_from_equations. We also output symbol resolution information for strings --- src/solvers/refinement/string_refinement.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index b70581640e3..e4189f137c9 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -531,9 +531,9 @@ void output_equations( const std::vector &equations, const namespacet &ns) { - for(const auto &eq : equations) - output << " * " << from_expr(ns, "", eq.lhs()) - << " == " << from_expr(ns, "", eq.rhs()) << std::endl; + for(std::size_t i = 0; i < equations.size(); ++i) + output << " [" << i << "] " << from_expr(ns, "", equations[i].lhs()) + << " == " << from_expr(ns, "", equations[i].rhs()) << std::endl; } /// Main decision procedure of the solver. Looks for a valuation of variables @@ -620,6 +620,14 @@ decision_proceduret::resultt string_refinementt::dec_solve() const union_find_replacet string_id_symbol_resolve = string_identifiers_resolution_from_equations(equations, ns, debug()); +#ifdef DEBUG + debug() << "symbol resolve string:" << eom; + for(const auto &pair : string_id_symbol_resolve.to_vector()) + { + debug() << from_expr(ns, "", pair.first) << " --> " + << from_expr(ns, "", pair.second) << eom; + } +#endif debug() << "dec_solve: Replacing char pointer symbols" << eom; replace_symbols_in_equations(symbol_resolve, equations); From c13e6029335fffb1b9ecff76f01a8de5339e5198 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 10 Jan 2018 17:53:30 +0000 Subject: [PATCH 7/8] Adding only needed equations in symbol resolution For cprover strings, adding all the equations to the union_find_replace structure could sometimes become counter performant. Instead here we look at what string expression are need for the function calls and in which equations they appear. --- src/solvers/refinement/string_refinement.cpp | 191 ++++++++++++++++--- src/solvers/refinement/string_refinement.h | 6 + 2 files changed, 173 insertions(+), 24 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index e4189f137c9..3e3ed3a9479 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -27,6 +27,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include static exprt substitute_array_with_expr(const exprt &expr, const exprt &index); @@ -452,7 +453,8 @@ static union_find_replacet generate_symbol_resolution_from_equations( // function applications can be ignored because they will be replaced // in the convert_function_application step of dec_solve } - else if(has_char_pointer_subtype(lhs.type())) + else if(lhs.type().id() != ID_pointer && + has_char_pointer_subtype(lhs.type())) { if(rhs.type().id() == ID_struct) { @@ -479,12 +481,125 @@ static union_find_replacet generate_symbol_resolution_from_equations( return solver; } -/// Symbol resolution for expressions of type string typet +/// Maps equation to expressions contained in them and conversely expressions to +/// equations that contain them. This can be used on a subset of expressions +/// which interests us, in particular strings. Equations are identified by an +/// index of type `std::size_t` for more efficient insertion and lookup. +class equation_symbol_mappingt +{ +public: + // Record index of the equations that contain a given expression + std::map> equations_containing; + // Record expressions that are contained in the equation with the given index + std::unordered_map> strings_in_equation; + + void add(const std::size_t i, const exprt &expr) + { + equations_containing[expr].push_back(i); + strings_in_equation[i].push_back(expr); + } + + std::vector find_expressions(const std::size_t i) + { + return strings_in_equation[i]; + } + + std::vector find_equations(const exprt &expr) + { + return equations_containing[expr]; + } +}; + +/// This is meant to be used on the lhs of an equation with string subtype. +/// \param lhs: expression which is either of string type, or a symbol +/// representing a struct with some string members +/// \return if lhs is a string return this string, if it is a struct return the +/// members of the expression that have string type. +static std::vector extract_strings_from_lhs(const exprt &lhs) +{ + std::vector result; + if(lhs.type() == string_typet()) + result.push_back(lhs); + else if(lhs.type().id() == ID_struct) + { + const struct_typet &struct_type = to_struct_type(lhs.type()); + for(const auto &comp : struct_type.components()) + { + const std::vector strings_in_comp = extract_strings_from_lhs( + member_exprt(lhs, comp.get_name(), comp.type())); + result.insert( + result.end(), strings_in_comp.begin(), strings_in_comp.end()); + } + } + return result; +} + +/// \param expr: an expression +/// \return all subexpressions of type string which are not if_exprt expressions +/// this includes expressions of the form `e.x` if e is a symbol subexpression +/// with a field `x` of type string +static std::vector extract_strings(const exprt &expr) +{ + std::vector result; + for(auto it = expr.depth_begin(); it != expr.depth_end();) + { + if(it->type() == string_typet() && it->id() != ID_if) + { + result.push_back(*it); + it.next_sibling_or_parent(); + } + else if(it->id() == ID_symbol) + { + for(const exprt &e : extract_strings_from_lhs(*it)) + result.push_back(e); + it.next_sibling_or_parent(); + } + else + ++it; + } + return result; +} + +/// Given an equation on strings, mark these strings as belonging to the same +/// set in the `symbol_resolve` structure. The lhs and rhs of the equation, +/// should have string type or be struct with string members. +/// \param eq: equation to add +/// \param symbol_resolve: structure to which the equation will be added +/// \param ns: namespace +static void add_string_equation_to_symbol_resolution( + const equal_exprt &eq, + union_find_replacet &symbol_resolve, + const namespacet &ns) +{ + if(eq.rhs().type() == string_typet()) + { + symbol_resolve.make_union(eq.lhs(), eq.rhs()); + } + else if(has_string_subtype(eq.lhs().type())) + { + if(eq.rhs().type().id() == ID_struct) + { + const struct_typet &struct_type = to_struct_type(eq.rhs().type()); + for(const auto &comp : struct_type.components()) + { + const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type()); + const exprt rhs_data = simplify_expr( + member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns); + add_string_equation_to_symbol_resolution( + equal_exprt(lhs_data, rhs_data), symbol_resolve, ns); + } + } + } +} + +/// Symbol resolution for expressions of type string typet. +/// We record equality between these expressions in the output if one of the +/// function calls depends on them. /// \param equations: list of equations /// \param ns: namespace /// \param stream: output stream /// \return union_find_replacet structure containing the correspondences. -static union_find_replacet string_identifiers_resolution_from_equations( +union_find_replacet string_identifiers_resolution_from_equations( std::vector &equations, const namespacet &ns, messaget::mstreamt &stream) @@ -494,35 +609,63 @@ static union_find_replacet string_identifiers_resolution_from_equations( "WARNING string_refinement.cpp " "string_identifiers_resolution_from_equations:"; - union_find_replacet result; - for(const equal_exprt &eq : equations) + equation_symbol_mappingt equation_map; + + // Indexes of equations that need to be added to the result + std::unordered_set required_equations; + std::stack equations_to_treat; + + for(std::size_t i = 0; i < equations.size(); ++i) { - if(eq.rhs().type() == string_typet()) - result.make_union(eq.lhs(), eq.rhs()); - else if(has_string_subtype(eq.lhs().type())) + const equal_exprt &eq = equations[i]; + if(eq.rhs().id() == ID_function_application) + { + if(required_equations.insert(i).second) + equations_to_treat.push(i); + + std::vector rhs_strings = extract_strings(eq.rhs()); + for(const auto expr : rhs_strings) + equation_map.add(i, expr); + } + else if(eq.lhs().type().id() != ID_pointer && + has_string_subtype(eq.lhs().type())) { - if(eq.rhs().type().id() == ID_struct) + std::vector lhs_strings = extract_strings_from_lhs(eq.lhs()); + + for(const auto expr : lhs_strings) + equation_map.add(i, expr); + + if(lhs_strings.empty()) { - const struct_typet &struct_type = to_struct_type(eq.rhs().type()); - for(const auto &comp : struct_type.components()) - { - if(comp.type() == string_typet()) - { - const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type()); - const exprt rhs_data = simplify_expr( - member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns); - result.make_union(lhs_data, rhs_data); - } - } + stream << log_message << "non struct with string subtype " + << from_expr(ns, "", eq.lhs()) << "\n * of type " + << from_type(ns, "", eq.lhs().type()) << eom; } - else + + for(const exprt &expr : extract_strings(eq.rhs())) + equation_map.add(i, expr); + } + } + + // transitively add all equations which depend on the equations to treat + while(!equations_to_treat.empty()) + { + const std::size_t i = equations_to_treat.top(); + equations_to_treat.pop(); + for(const exprt &string : equation_map.find_expressions(i)) + { + for(const std::size_t j : equation_map.find_equations(string)) { - stream << log_message << "non struct with string subexpr " - << from_expr(ns, "", eq.rhs()) << "\n * of type " - << from_type(ns, "", eq.rhs().type()) << eom; + if(required_equations.insert(j).second) + equations_to_treat.push(j); } } } + + union_find_replacet result; + for(const std::size_t i : required_equations) + add_string_equation_to_symbol_resolution(equations[i], result, ns); + return result; } diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 53515b51685..8e4a3432703 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -116,4 +116,10 @@ bool has_subtype( const typet &type, const std::function &pred); +// Declaration required for unit-test: +union_find_replacet string_identifiers_resolution_from_equations( + std::vector &equations, + const namespacet &ns, + messaget::mstreamt &stream); + #endif From 23c356112e5539b3262685f49ec798db9f9d5b00 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 11 Jan 2018 12:18:38 +0000 Subject: [PATCH 8/8] Unit test for string symbol resolution --- unit/Makefile | 1 + .../string_symbol_resolution.cpp | 101 ++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp diff --git a/unit/Makefile b/unit/Makefile index ab790bc730d..752300fc0c4 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -33,6 +33,7 @@ SRC += unit_tests.cpp \ solvers/refinement/string_refinement/concretize_array.cpp \ solvers/refinement/string_refinement/has_subtype.cpp \ solvers/refinement/string_refinement/substitute_array_list.cpp \ + solvers/refinement/string_refinement/string_symbol_resolution.cpp \ solvers/refinement/string_refinement/union_find_replace.cpp \ util/expr_cast/expr_cast.cpp \ util/expr_iterator.cpp \ diff --git a/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp b/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp new file mode 100644 index 00000000000..076b2a5bbf2 --- /dev/null +++ b/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp @@ -0,0 +1,101 @@ +/*******************************************************************\ + + Module: Unit tests for string_identifiers_resolution_from_equations in + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +SCENARIO("string_identifiers_resolution_from_equations", +"[core][solvers][refinement][string_refinement]") +{ + // For printing expression + register_language(new_java_bytecode_language); + symbol_tablet symbol_table; + namespacet ns(symbol_table); + messaget::mstreamt &stream = messaget().debug(); + + GIVEN("Some equations") + { + constant_exprt a("a", string_typet()); + constant_exprt b("b", string_typet()); + constant_exprt c("c", string_typet()); + constant_exprt d("d", string_typet()); + constant_exprt e("e", string_typet()); + constant_exprt f("f", string_typet()); + + struct_typet struct_type; + struct_type.components().emplace_back("str1", string_typet()); + struct_type.components().emplace_back("str2", string_typet()); + struct_exprt struct_expr(struct_type); + struct_expr.operands().push_back(a); + struct_expr.operands().push_back(f); + symbol_exprt symbol_struct("sym_struct", struct_type); + + std::vector equations; + equations.emplace_back(a, b); + equations.emplace_back(b, c); + equations.emplace_back(d, e); + equations.emplace_back(symbol_struct, struct_expr); + + WHEN("There is no function call") + { + union_find_replacet symbol_resolve = + string_identifiers_resolution_from_equations( + equations, ns, stream); + + THEN("The symbol resolution structure is empty") + { + REQUIRE(symbol_resolve.to_vector().empty()); + } + } + + WHEN("There is a function call") + { + symbol_exprt fun_sym("f", code_typet()); + function_application_exprt fun(fun_sym, bool_typet()); + fun.operands().push_back(c); + symbol_exprt bool_sym("bool_b", bool_typet()); + equations.emplace_back(bool_sym, fun); + union_find_replacet symbol_resolve = + string_identifiers_resolution_from_equations( + equations, ns, stream); + + THEN("Equations that depend on it should be added") + { + REQUIRE(symbol_resolve.find(b) == symbol_resolve.find(c)); + REQUIRE(symbol_resolve.find(a) == symbol_resolve.find(b)); + + member_exprt sym_m1(symbol_struct, "str1", string_typet()); + member_exprt sym_m2(symbol_struct, "str2", string_typet()); + + REQUIRE(symbol_resolve.find(sym_m1) == symbol_resolve.find(c)); + REQUIRE(symbol_resolve.find(sym_m2) == symbol_resolve.find(f)); + } + + THEN("Equations that do not depend on it should not be added") + { + REQUIRE(symbol_resolve.find(d) != symbol_resolve.find(e)); + } + + THEN("No other equation is added") + { + REQUIRE(symbol_resolve.find(a) != symbol_resolve.find(d)); + REQUIRE(symbol_resolve.find(a) != symbol_resolve.find(f)); + REQUIRE(symbol_resolve.find(d) != symbol_resolve.find(f)); + } + } + } +}