From a914153c75c3b8f76124cbc60df26021741cfc85 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 9 Apr 2018 07:38:53 +0100 Subject: [PATCH 01/25] Use map instead of vector for sparse array entries This avoid having to explicitly sort the entries, and make looking up for an element more efficient: logarithmic instead of linear. --- src/solvers/refinement/string_refinement_util.cpp | 14 +------------- src/solvers/refinement/string_refinement_util.h | 10 ++++++++-- .../refinement/string_refinement/sparse_array.cpp | 14 ++++++-------- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 0175984f4b5..ebe757bf637 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -97,7 +97,7 @@ sparse_arrayt::sparse_arrayt(const with_exprt &expr) { const auto &with_expr = expr_dynamic_cast(ref.get()); const auto current_index = numeric_cast_v(with_expr.where()); - entries.emplace_back(current_index, with_expr.new_value()); + entries[current_index] = with_expr.new_value(); ref = with_expr.old(); } @@ -123,18 +123,6 @@ exprt sparse_arrayt::to_if_expression(const exprt &index) const }); } -interval_sparse_arrayt::interval_sparse_arrayt(const with_exprt &expr) - : sparse_arrayt(expr) -{ - // Entries are sorted so that successive entries correspond to intervals - std::sort( - entries.begin(), - entries.end(), - []( - const std::pair &a, - const std::pair &b) { return a.first < b.first; }); -} - exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const { return std::accumulate( diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index e8f7b615e8f..00079928f87 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -98,7 +98,10 @@ class sparse_arrayt protected: exprt default_value; - std::vector> entries; + std::map entries; + explicit sparse_arrayt(exprt default_value) : default_value(default_value) + { + } }; /// Represents arrays by the indexes up to which the value remains the same. @@ -112,7 +115,10 @@ class interval_sparse_arrayt final : public sparse_arrayt /// converted to an array `arr` where for all index `k` smaller than `i`, /// `arr[k]` is `a`, for all index between `i` (exclusive) and `j` it is `b` /// and for the others it is `x`. - explicit interval_sparse_arrayt(const with_exprt &expr); + explicit interval_sparse_arrayt(const with_exprt &expr) : sparse_arrayt(expr) + { + } + exprt to_if_expression(const exprt &index) const override; }; diff --git a/unit/solvers/refinement/string_refinement/sparse_array.cpp b/unit/solvers/refinement/string_refinement/sparse_array.cpp index 9534d791466..d623dc37abc 100644 --- a/unit/solvers/refinement/string_refinement/sparse_array.cpp +++ b/unit/solvers/refinement/string_refinement/sparse_array.cpp @@ -46,19 +46,17 @@ SCENARIO("sparse_array", "[core][solvers][refinement][string_refinement]") WHEN("It is converted to a sparse array") { const sparse_arrayt sparse_array(input_expr); - THEN("The resulting if expression is index=4?x:index=1?y:index=100?z:0") + THEN("The resulting if expression is index=100?z:index=4?x:index=1?y:0") { const symbol_exprt index("index", int_type); const if_exprt expected( - equal_exprt(index, index4), - charx, + equal_exprt(index, index100), + charz, if_exprt( - equal_exprt(index, index1), - chary, + equal_exprt(index, index4), + charx, if_exprt( - equal_exprt(index, index100), - charz, - from_integer(0, char_type)))); + equal_exprt(index, index1), chary, from_integer(0, char_type)))); REQUIRE(sparse_array.to_if_expression(index) == expected); } } From d483c81bf01eb635e51b75fdc480f69e58b68632 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 30 Mar 2018 10:37:11 +0100 Subject: [PATCH 02/25] Initialize sparse array from array_exprt --- .../refinement/string_refinement_util.cpp | 18 ++++++++++++++++++ .../refinement/string_refinement_util.h | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index ebe757bf637..f63ad2b9b0b 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -140,6 +140,24 @@ exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const }); } +interval_sparse_arrayt::interval_sparse_arrayt( + const array_exprt &expr, + const exprt &extra_value) + : sparse_arrayt(extra_value) +{ + const auto &operands = expr.operands(); + exprt last_added = extra_value; + for(std::size_t i = 0; i < operands.size(); ++i) + { + const std::size_t index = operands.size() - 1 - i; + if(operands[index].id() != ID_unknown && operands[index] != last_added) + { + entries[index] = operands[index]; + last_added = operands[index]; + } + } +} + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index 00079928f87..fda1d08ac0b 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -119,6 +119,11 @@ class interval_sparse_arrayt final : public sparse_arrayt { } + /// Initialize an array expression to sparse array representation, avoiding + /// repetition of identical successive values and setting the default to + /// `extra_value`. + interval_sparse_arrayt(const array_exprt &expr, const exprt &extra_value); + exprt to_if_expression(const exprt &index) const override; }; From ede2fa1a4e4028686e294466ba9845f6e917bd9c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 5 Apr 2018 16:45:50 +0100 Subject: [PATCH 03/25] Clear string_dependencies in calls to dec_solve If the content is kept between calls to dec_solve, some nodes can be duplicated in the graph, which leads to more constraints added and decreased performance. --- src/solvers/refinement/string_refinement.cpp | 18 +++++++++++------- .../refinement/string_refinement_util.cpp | 8 ++++++++ .../refinement/string_refinement_util.h | 3 +++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 8cec89c46fb..fed05e00478 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -648,20 +648,24 @@ decision_proceduret::resultt string_refinementt::dec_solve() output_equations(debug(), equations, ns); #endif + // The object `dependencies` is also used by get, so we have to use it as a + // class member but we make sure it is cleared at each `dec_solve` call. + dependencies.clear(); debug() << "dec_solve: compute dependency graph and remove function " << "applications captured by the dependencies:" << eom; - const auto new_equation_end = std::remove_if( - equations.begin(), equations.end(), [&](const equal_exprt &eq) { // NOLINT - return add_node(dependencies, eq, generator.array_pool); - }); - equations.erase(new_equation_end, equations.end()); + std::vector local_equations; + for(const equal_exprt &eq : equations) + { + if(!add_node(dependencies, eq, generator.array_pool)) + local_equations.push_back(eq); + } #ifdef DEBUG dependencies.output_dot(debug()); #endif debug() << "dec_solve: add constraints" << eom; - dependencies.add_constraints(generator); + dependencies.add_constraints(generator); #ifdef DEBUG output_equations(debug(), equations, ns); @@ -676,7 +680,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() } #endif - for(const auto &eq : equations) + for(const auto &eq : local_equations) { #ifdef DEBUG debug() << "dec_solve: set_to " << format(eq) << eom; diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index f63ad2b9b0b..47666b93c70 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -259,6 +259,14 @@ void string_dependenciest::add_dependency( }); } +void string_dependenciest::clear() +{ + builtin_function_nodes.clear(); + string_nodes.clear(); + node_index_pool.clear(); + clean_cache(); +} + static void add_dependency_to_string_subexprs( string_dependenciest &dependencies, const function_application_exprt &fun_app, diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index fda1d08ac0b..8ec325f9efe 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -254,6 +254,9 @@ class string_dependenciest /// only add constraints on the length. void add_constraints(string_constraint_generatort &generatort); + /// Clear the content of the dependency graph + void clear(); + private: /// Set of nodes representing builtin_functions std::vector builtin_function_nodes; From 5b4d61810e2534a76709e6c6f603daaac8dd7b49 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 5 Apr 2018 16:49:59 +0100 Subject: [PATCH 04/25] Reduce number of constraints in format We reduce the number of constraints by avoiding the copy of the last string. --- .../string_constraint_generator_format.cpp | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_format.cpp b/src/solvers/refinement/string_constraint_generator_format.cpp index 03a6f788d28..847d7baec4d 100644 --- a/src/solvers/refinement/string_constraint_generator_format.cpp +++ b/src/solvers/refinement/string_constraint_generator_format.cpp @@ -358,6 +358,7 @@ exprt string_constraint_generatort::add_axioms_for_format( const typet &index_type = res.length().type(); for(const format_elementt &fe : format_strings) + { if(fe.is_format_specifier()) { const format_specifiert &fs=fe.get_format_specifier(); @@ -392,24 +393,37 @@ exprt string_constraint_generatort::add_axioms_for_format( add_axioms_for_constant(str, fe.get_format_text().get_content()); intermediary_strings.push_back(str); } + } + + exprt return_code = from_integer(0, get_return_code_type()); if(intermediary_strings.empty()) - return to_array_string_expr( - array_exprt(array_typet(char_type, from_integer(0, index_type)))); + { + lemmas.push_back(equal_exprt(res.length(), from_integer(0, index_type))); + return return_code; + } - auto it=intermediary_strings.begin(); - array_string_exprt str = *(it++); - exprt return_code = from_integer(0, signedbv_typet(32)); - for(; it!=intermediary_strings.end(); ++it) + array_string_exprt str = intermediary_strings[0]; + + if(intermediary_strings.size() == 1) { + // Copy the first string + return add_axioms_for_substring( + res, str, from_integer(0, index_type), str.length()); + } + + // start after the first string and stop before the last + for(std::size_t i = 1; i < intermediary_strings.size() - 1; ++i) + { + const array_string_exprt &intermediary = intermediary_strings[i]; const array_string_exprt fresh = fresh_string(index_type, char_type); return_code = - bitor_exprt(return_code, add_axioms_for_concat(fresh, str, *it)); + bitor_exprt(return_code, add_axioms_for_concat(fresh, str, intermediary)); str = fresh; } - // Copy - add_axioms_for_substring(res, str, from_integer(0, index_type), str.length()); - return return_code; + + return bitor_exprt( + return_code, add_axioms_for_concat(res, str, intermediary_strings.back())); } /// Construct a string from a constant array. From 5f07bf025b32c90da5e7f5505014d8a928ac3c23 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 09:35:00 +0100 Subject: [PATCH 05/25] Initialization of sparse array from array-list --- .../refinement/string_refinement_util.cpp | 18 ++++++++++++++++++ .../refinement/string_refinement_util.h | 12 ++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 47666b93c70..4e12ebb7ac5 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -158,6 +158,24 @@ interval_sparse_arrayt::interval_sparse_arrayt( } } +interval_sparse_arrayt interval_sparse_arrayt::of_array_list( + const exprt &expr, + const exprt &extra_value) +{ + PRECONDITION(expr.operands().size() % 2 == 0); + interval_sparse_arrayt sparse_array(extra_value); + for(std::size_t i = 0; i < expr.operands().size(); i += 2) + { + const auto index = numeric_cast(expr.operands()[i]); + INVARIANT( + expr.operands()[i + 1].type() == extra_value.type(), + "all values in array should have the same type"); + if(index.has_value() && expr.operands()[i + 1].id() != ID_unknown) + sparse_array.entries[*index] = expr.operands()[i + 1]; + } + return sparse_array; +} + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index 8ec325f9efe..b05671ddf3e 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -124,7 +124,19 @@ class interval_sparse_arrayt final : public sparse_arrayt /// `extra_value`. interval_sparse_arrayt(const array_exprt &expr, const exprt &extra_value); + /// Initialize a sparse array from an array represented by a list of + /// index-value pairs, and setting the default to `extra_value`. + /// Indexes must be constant expressions, and negative indexes are ignored. + static interval_sparse_arrayt + of_array_list(const exprt &expr, const exprt &extra_value); + exprt to_if_expression(const exprt &index) const override; + +private: + explicit interval_sparse_arrayt(exprt default_value) + : sparse_arrayt(default_value) + { + } }; /// Maps equation to expressions contained in them and conversely expressions to From 848dd959dc5c3d64a887a8818a09d4b0f25145e6 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 09:35:22 +0100 Subject: [PATCH 06/25] Add an interval_sparse_array::of_expr function This initialize a sparse array from the array expression we know how to deal with. --- src/solvers/refinement/string_refinement_util.cpp | 12 ++++++++++++ src/solvers/refinement/string_refinement_util.h | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 4e12ebb7ac5..662ea9ec11d 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -176,6 +176,18 @@ interval_sparse_arrayt interval_sparse_arrayt::of_array_list( return sparse_array; } +optionalt +interval_sparse_arrayt::of_expr(const exprt &expr, const exprt &extra_value) +{ + if(const auto &array_expr = expr_try_dynamic_cast(expr)) + return interval_sparse_arrayt(*array_expr, extra_value); + if(const auto &with_expr = expr_try_dynamic_cast(expr)) + return interval_sparse_arrayt(*with_expr); + if(expr.id() == "array-list") + return of_array_list(expr, extra_value); + return {}; +} + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index b05671ddf3e..ef16e6ec486 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -132,6 +132,11 @@ class interval_sparse_arrayt final : public sparse_arrayt exprt to_if_expression(const exprt &index) const override; + /// If the expression is an array_exprt or a with_exprt uses the appropriate + /// constructor, otherwise returns empty optional. + static optionalt + of_expr(const exprt &expr, const exprt &extra_value); + private: explicit interval_sparse_arrayt(exprt default_value) : sparse_arrayt(default_value) From 0d0359136b93e4ad6e5ba95350045eb4237ada3c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 13:12:04 +0100 Subject: [PATCH 07/25] Add an `at` function for access in sparse arrays --- src/solvers/refinement/string_refinement_util.cpp | 13 +++++++++++++ src/solvers/refinement/string_refinement_util.h | 8 ++++++++ 2 files changed, 21 insertions(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 662ea9ec11d..b5bd656fd7d 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -123,6 +123,12 @@ exprt sparse_arrayt::to_if_expression(const exprt &index) const }); } +exprt sparse_arrayt::at(const std::size_t index) const +{ + const auto it = entries.find(index); + return it != entries.end() ? it->second : default_value; +} + exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const { return std::accumulate( @@ -188,6 +194,13 @@ interval_sparse_arrayt::of_expr(const exprt &expr, const exprt &extra_value) return {}; } +exprt interval_sparse_arrayt::at(const std::size_t index) const +{ + // First element at or after index + const auto it = entries.lower_bound(index); + return it != entries.end() ? it->second : default_value; +} + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index ef16e6ec486..f3afff710ad 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -96,6 +96,10 @@ class sparse_arrayt /// at the given index virtual exprt to_if_expression(const exprt &index) const; + /// Get the value at the specified index. + /// Complexity is linear in the number of entries. + virtual exprt at(std::size_t index) const; + protected: exprt default_value; std::map entries; @@ -137,6 +141,10 @@ class interval_sparse_arrayt final : public sparse_arrayt static optionalt of_expr(const exprt &expr, const exprt &extra_value); + /// Get the value at the specified index. + /// Complexity is linear in the number of entries. + exprt at(std::size_t index) const override; + private: explicit interval_sparse_arrayt(exprt default_value) : sparse_arrayt(default_value) From c52c813fb7a52b38b9d8ab3c986761d967d2e3f9 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 13:11:21 +0100 Subject: [PATCH 08/25] Add a sum_overflows function Detects when an overflow happens in the sum of two integers. This will be used in buitin functions for dealing with the overflow case. --- .../refinement/string_constraint_generator.h | 3 ++ .../string_constraint_generator_main.cpp | 31 +++++++++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 7fcd2d76574..00f9d4da6c2 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -447,6 +447,9 @@ exprt minimum(const exprt &a, const exprt &b); /// \return expression representing the maximum of two expressions exprt maximum(const exprt &a, const exprt &b); +/// \return Boolean true when the sum of the two expressions overflows +exprt sum_overflows(const plus_exprt &sum); + exprt length_constraint_for_concat_char( const array_string_exprt &res, const array_string_exprt &s1); diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 97c0287eb96..2e88bfdfe14 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -130,30 +130,35 @@ symbol_exprt string_constraint_generatort::fresh_boolean( return b; } +exprt sum_overflows(const plus_exprt &sum) +{ + PRECONDITION(sum.operands().size() == 2); + const exprt zero = from_integer(0, sum.op0().type()); + const binary_relation_exprt op0_negative(sum.op0(), ID_lt, zero); + const binary_relation_exprt op1_negative(sum.op1(), ID_lt, zero); + const binary_relation_exprt sum_negative(sum, ID_lt, zero); + + // overflow happens when we add two values of same sign but their sum has a + // different sign + return and_exprt( + equal_exprt(op0_negative, op1_negative), + notequal_exprt(op1_negative, sum_negative)); +} + /// Create a plus expression while adding extra constraints to axioms in order /// to prevent overflows. /// \param op1: First term of the sum /// \param op2: Second term of the sum /// \return A plus expression representing the sum of the arguments +/// \deprecated plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( const exprt &op1, const exprt &op2) { - plus_exprt sum(plus_exprt(op1, op2)); - - exprt zero=from_integer(0, op1.type()); - - binary_relation_exprt neg1(op1, ID_lt, zero); - binary_relation_exprt neg2(op2, ID_lt, zero); - binary_relation_exprt neg_sum(sum, ID_lt, zero); - + const plus_exprt sum(plus_exprt(op1, op2)); // We prevent overflows by adding the following constraint: // If the signs of the two operands are the same, then the sign of the sum // should also be the same. - implies_exprt no_overflow(equal_exprt(neg1, neg2), - equal_exprt(neg1, neg_sum)); - - lemmas.push_back(no_overflow); - + lemmas.push_back(not_exprt(sum_overflows(sum))); return sum; } From c40b836053e751ac736a1e509aeb718d5610fecf Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 10:59:53 +0100 Subject: [PATCH 09/25] Truncate string concatenations in case of overflow --- .../string_constraint_generator_concat.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index d1cd743b32d..3a8cdfca8cc 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -18,12 +18,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// at index `end_index'`. /// Where start_index' is max(0, start_index) and end_index' is /// max(min(end_index, s2.length), start_index') +/// If s1.length + end_index' - start_index' is greater than the maximal integer +/// of the type of res.length, then the result gets truncated to the size +/// of this maximal integer. /// /// These axioms are: -/// 1. \f$|res| = |s_1| + end\_index' - start\_index' \f$ +/// 1. \f$|res| = overflow ? |s_1| + end\_index' - start\_index' +/// : max_int \f$ /// 2. \f$\forall i<|s_1|. res[i]=s_1[i] \f$ -/// 3. \f$\forall i< end\_index' - start\_index'.\ res[i+|s_1|] -/// = s_2[start\_index'+i]\f$ +/// 3. \f$\forall i< |res| - |s_1|.\ res[i+|s_1|] = s_2[start\_index'+i]\f$ /// /// \param res: an array of characters expression /// \param s1: an array of characters expression @@ -59,7 +62,8 @@ exprt string_constraint_generatort::add_axioms_for_concat_substr( fresh_univ_index("QA_index_concat2", res.length().type()); const equal_exprt res_eq( res[plus_exprt(idx2, s1.length())], s2[plus_exprt(start1, idx2)]); - return string_constraintt(idx2, minus_exprt(end1, start1), res_eq); + const minus_exprt upper_bound(res.length(), s1.length()); + return string_constraintt(idx2, upper_bound, res_eq); }()); return from_integer(0, get_return_code_type()); @@ -77,10 +81,13 @@ exprt length_constraint_for_concat_substr( const exprt &start, const exprt &end) { + PRECONDITION(res.length().type().id() == ID_signedbv); const exprt start1 = maximum(start, from_integer(0, start.type())); const exprt end1 = maximum(minimum(end, s2.length()), start1); const plus_exprt res_length(s1.length(), minus_exprt(end1, start1)); - return equal_exprt(res.length(), res_length); + const exprt overflow = sum_overflows(res_length); + const exprt max_int = to_signedbv_type(res.length().type()).largest_expr(); + return equal_exprt(res.length(), if_exprt(overflow, max_int, res_length)); } /// Add axioms enforcing that the length of `res` is that of the concatenation From 4779b257fbddbdff2045adfac06138bd9c6eab08 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 11:09:32 +0100 Subject: [PATCH 10/25] Get rid of calls to plus_exprt_with_overflow This function is adding assumptions on the values of integers which may lead to contradiction. It is better to deal with overflows at the level of the specification of the builtin functions instead. --- ...tring_constraint_generator_code_points.cpp | 9 ++-- .../string_constraint_generator_indexof.cpp | 3 +- .../string_constraint_generator_testing.cpp | 9 ++-- ...ng_constraint_generator_transformation.cpp | 49 +++++++++---------- 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 1f61a769208..97a94e1a417 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -128,12 +128,11 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( const symbol_exprt result = fresh_symbol("char", return_type); const exprt index1 = from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; + const exprt &char2 = str[plus_exprt(pos, index1)]; const typecast_exprt char1_as_int(char1, return_type); const typecast_exprt char2_as_int(char2, return_type); const exprt pair = pair_value(char1_as_int, char2_as_int, return_type); - const exprt is_low = - is_low_surrogate(str[plus_exprt_with_overflow_check(pos, index1)]); + const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]); const and_exprt return_pair(is_high_surrogate(str[pos]), is_low); lemmas.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -210,8 +209,8 @@ exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const typet &return_type=f.type(); const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type); - const exprt minimum = plus_exprt_with_overflow_check(index, offset); - const exprt maximum = plus_exprt_with_overflow_check(minimum, offset); + const exprt minimum = plus_exprt(index, offset); + const exprt maximum = plus_exprt(minimum, offset); lemmas.push_back(binary_relation_exprt(result, ID_le, maximum)); lemmas.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index e2336e598a9..6ac23094b05 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -352,8 +352,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( lemmas.push_back(a3); const exprt index1 = from_integer(1, index_type); - const exprt from_index_plus_one = - plus_exprt_with_overflow_check(from_index, index1); + const plus_exprt from_index_plus_one(from_index, index1); const if_exprt end_index( binary_relation_exprt(from_index_plus_one, ID_le, str.length()), from_index_plus_one, diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 09948073b49..81dd16272aa 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -36,11 +36,10 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( symbol_exprt isprefix=fresh_boolean("isprefix"); const typet &index_type=str.length().type(); - implies_exprt a1( - isprefix, - str.axiom_for_length_ge(plus_exprt_with_overflow_check( - prefix.length(), offset))); - lemmas.push_back(a1); + // Axiom 1. + lemmas.push_back( + implies_exprt( + isprefix, str.axiom_for_length_ge(plus_exprt(prefix.length(), offset)))); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); string_constraintt a2( diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index a5b01005c7b..154e027a5ef 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -175,9 +175,8 @@ exprt string_constraint_generatort::add_axioms_for_trim( const symbol_exprt idx = fresh_exist_index("index_trim", index_type); const exprt space_char = from_integer(' ', char_type); - exprt a1=str.axiom_for_length_ge( - plus_exprt_with_overflow_check(idx, res.length())); - lemmas.push_back(a1); + // Axiom 1. + lemmas.push_back(str.axiom_for_length_ge(plus_exprt(idx, res.length()))); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); lemmas.push_back(a2); @@ -197,32 +196,31 @@ exprt string_constraint_generatort::add_axioms_for_trim( string_constraintt a6(n, idx, non_print); constraints.push_back(a6); - symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, - res.length())); - binary_relation_exprt eqn2( - str[plus_exprt(idx, plus_exprt(res.length(), n2))], - ID_le, - space_char); - - string_constraintt a7(n2, bound, eqn2); - constraints.push_back(a7); + // Axiom 7. + constraints.push_back([&] { // NOLINT + const symbol_exprt n2 = fresh_univ_index("QA_index_trim2", index_type); + const minus_exprt bound(minus_exprt(str.length(), idx), res.length()); + const binary_relation_exprt eqn2( + str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, space_char); + return string_constraintt(n2, bound, eqn2); + }()); symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type); equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); string_constraintt a8(n3, res.length(), eqn3); constraints.push_back(a8); - minus_exprt index_before( - plus_exprt_with_overflow_check(idx, res.length()), - from_integer(1, index_type)); - binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); - or_exprt a9( - equal_exprt(idx, str.length()), - and_exprt( - binary_relation_exprt(str[idx], ID_gt, space_char), - no_space_before)); - lemmas.push_back(a9); + // Axiom 9. + lemmas.push_back([&] { + const plus_exprt index_before( + idx, minus_exprt(res.length(), from_integer(1, index_type))); + const binary_relation_exprt no_space_before( + str[index_before], ID_gt, space_char); + return or_exprt( + equal_exprt(idx, str.length()), + and_exprt( + binary_relation_exprt(str[idx], ID_gt, space_char), no_space_before)); + }()); return from_integer(0, f.type()); } @@ -511,10 +509,7 @@ exprt string_constraint_generatort::add_axioms_for_delete_char_at( const array_string_exprt str = get_string_expr(f.arguments()[2]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - res, - str, - f.arguments()[3], - plus_exprt_with_overflow_check(f.arguments()[3], index_one)); + res, str, f.arguments()[3], plus_exprt(f.arguments()[3], index_one)); } /// Add axioms stating that `res` corresponds to the input `str` From ce4c008fda4cc8ab35aacec65157055aef48ec15 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:01:27 +0100 Subject: [PATCH 11/25] Remove plus_exprt_with_overflow_check --- .../refinement/string_constraint_generator.h | 2 -- .../string_constraint_generator_main.cpp | 17 ----------------- 2 files changed, 19 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 00f9d4da6c2..bb77486bd2f 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -177,8 +177,6 @@ class string_constraint_generatort final array_string_exprt fresh_string(const typet &index_type, const typet &char_type); array_string_exprt get_string_expr(const exprt &expr); - plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - static constant_exprt constant_char(int i, const typet &char_type); diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 2e88bfdfe14..bf9c36ff0dd 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -145,23 +145,6 @@ exprt sum_overflows(const plus_exprt &sum) notequal_exprt(op1_negative, sum_negative)); } -/// Create a plus expression while adding extra constraints to axioms in order -/// to prevent overflows. -/// \param op1: First term of the sum -/// \param op2: Second term of the sum -/// \return A plus expression representing the sum of the arguments -/// \deprecated -plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( - const exprt &op1, const exprt &op2) -{ - const plus_exprt sum(plus_exprt(op1, op2)); - // We prevent overflows by adding the following constraint: - // If the signs of the two operands are the same, then the sign of the sum - // should also be the same. - lemmas.push_back(not_exprt(sum_overflows(sum))); - return sum; -} - /// Associate an actual finite length to infinite arrays /// \param s: array expression representing a string /// \return expression for the length of `s` From cb105500481b5db07b49049b5454d5aa062d1933 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:02:29 +0100 Subject: [PATCH 12/25] Use sparse arrays in substitute_array_access The obtained expression can be exponentially smaller, because sparse array representation avoids repetitions. --- src/solvers/refinement/string_refinement.cpp | 28 ++++---------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index fed05e00478..27f6db950e0 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -1188,6 +1188,8 @@ exprt fill_in_array_expr(const array_exprt &expr, std::size_t string_max_length) /// expressions: for instance in array access `arr[index]`, where: /// `arr := {12, 24, 48}` the constructed expression will be: /// `index==0 ? 12 : index==1 ? 24 : 48` +/// Avoids repetition so `arr := {12, 12, 24, 48}` will give +/// `index<=1 ? 12 : index==1 ? 24 : 48` static exprt substitute_array_access( const array_exprt &array_expr, const exprt &index, @@ -1195,29 +1197,9 @@ static exprt substitute_array_access( &symbol_generator) { const typet &char_type = array_expr.type().subtype(); - const typet &index_type = to_array_type(array_expr.type()).size().type(); - const std::vector &operands = array_expr.operands(); - - exprt result = symbol_generator("out_of_bound_access", char_type); - - for(std::size_t i = 0; i < operands.size(); ++i) - { - // Go in reverse order so that smaller indexes appear first in the result - const std::size_t pos = operands.size() - 1 - i; - const equal_exprt equals(index, from_integer(pos, index_type)); - if(operands[pos].type() != char_type) - { - INVARIANT( - operands[pos].id() == ID_unknown, - string_refinement_invariantt( - "elements can only have type char or " - "unknown, and it is not char type")); - result = if_exprt(equals, exprt(ID_unknown, char_type), result); - } - else - result = if_exprt(equals, operands[pos], result); - } - return result; + const exprt default_val = symbol_generator("out_of_bound_access", char_type); + const interval_sparse_arrayt sparse_array(array_expr, default_val); + return sparse_array.to_if_expression(index); } static exprt substitute_array_access( From dfc584ada84093fc6422e53d4a15f19c80c38b26 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:05:21 +0100 Subject: [PATCH 13/25] Add concretize function for interval_sparse_array --- .../refinement/string_refinement_util.cpp | 18 ++++++++++++++++++ .../refinement/string_refinement_util.h | 3 +++ 2 files changed, 21 insertions(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index b5bd656fd7d..6714a49af4f 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -201,6 +201,24 @@ exprt interval_sparse_arrayt::at(const std::size_t index) const return it != entries.end() ? it->second : default_value; } +array_exprt interval_sparse_arrayt::concretize( + std::size_t size, + const typet &index_type) const +{ + const array_typet array_type( + default_value.type(), from_integer(size, index_type)); + array_exprt array(array_type); + + std::size_t current_index = 0; + for(const auto &pair : entries) + { + for(; current_index <= pair.first && current_index < size; ++current_index) + array.operands().push_back(pair.second); + } + for(; current_index < size; ++current_index) + array.operands().push_back(default_value); + return array; +} void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index f3afff710ad..d1ed9ae0710 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -141,6 +141,9 @@ class interval_sparse_arrayt final : public sparse_arrayt static optionalt of_expr(const exprt &expr, const exprt &extra_value); + /// Convert to an array representation, ignores elements at index >= size + array_exprt concretize(std::size_t size, const typet &index_type) const; + /// Get the value at the specified index. /// Complexity is linear in the number of entries. exprt at(std::size_t index) const override; From 037f631b955c3e3afe9cd1ef32bc2d816769b7c1 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 10:35:17 +0100 Subject: [PATCH 14/25] Refactor string_refinementt::get --- src/solvers/refinement/string_refinement.cpp | 42 +++++++------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 27f6db950e0..885a3055afc 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -2228,13 +2228,9 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) /// \return evaluated expression exprt string_refinementt::get(const exprt &expr) const { - // clang-format off - const auto super_get = [this](const exprt &expr) - { + const auto super_get = [this](const exprt &expr) { return supert::get(expr); }; - // clang-format on - exprt ecopy(expr); (void)symbol_resolve.replace_expr(ecopy); @@ -2248,35 +2244,25 @@ exprt string_refinementt::get(const exprt &expr) const dependencies.eval(arr, [&](const exprt &expr) { return get(expr); })) return *from_dependencies; - const auto arr_model_opt = - get_array(super_get, ns, generator.max_string_length, debug(), arr); - // \todo Refactor with get array in model - if(arr_model_opt) + if( + const auto arr_model_opt = + get_array(super_get, ns, generator.max_string_length, debug(), arr)) { - const exprt arr_model = simplify_expr(*arr_model_opt, ns); - const exprt concretized_array = concretize_arrays_in_expression( - arr_model, generator.max_string_length, ns); - return concretized_array; + // \todo get_array should take care of the concretization + return concretize_arrays_in_expression( + *arr_model_opt, generator.max_string_length, ns); } - else + + if(generator.get_created_strings().count(arr)) { - auto set = generator.get_created_strings(); - if(set.find(arr) != set.end()) + const exprt length = super_get(arr.length()); + if(const auto n = numeric_cast(length)) { - exprt length = super_get(arr.length()); - if(const auto n = numeric_cast(length)) - { - exprt arr_model = - array_exprt(array_typet(arr.type().subtype(), length)); - for(size_t i = 0; i < *n; i++) - arr_model.copy_to_operands(exprt(ID_unknown, arr.type().subtype())); - const exprt concretized_array = concretize_arrays_in_expression( - arr_model, generator.max_string_length, ns); - return concretized_array; - } + const array_exprt arr_model(array_typet(arr.type().subtype(), length)); + return fill_in_array_expr(arr_model, generator.max_string_length); } - return arr; } + return arr; } return supert::get(ecopy); } From 9b286d9a3d20ff9f570c2ff2c7ec609a263bd735 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 10:36:46 +0100 Subject: [PATCH 15/25] Use sparse arrays in string_refinement::get In the case of index_expressions this can use exponentialy less memory. --- src/solvers/refinement/string_refinement.cpp | 39 +++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 885a3055afc..99477f1adbb 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -2222,8 +2222,8 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) /// Evaluates the given expression in the valuation found by /// string_refinementt::dec_solve. /// -/// The difference with supert::get is that arrays of characters need to be -/// concretized. See concretize_arrays_in_expression for how it is done. +/// Arrays of characters are interpreted differently from the result of +/// supert::get: values are propagated to the left to fill unknown. /// \param expr: an expression /// \return evaluated expression exprt string_refinementt::get(const exprt &expr) const @@ -2234,6 +2234,41 @@ exprt string_refinementt::get(const exprt &expr) const exprt ecopy(expr); (void)symbol_resolve.replace_expr(ecopy); + // Special treatment for index expressions + const auto &index_expr = expr_try_dynamic_cast(ecopy); + if(index_expr && is_char_type(index_expr->type())) + { + std::reference_wrapper current(index_expr->array()); + while(current.get().id() == ID_if) + { + const auto &if_expr = expr_dynamic_cast(current.get()); + const exprt cond = get(if_expr.cond()); + if(cond.is_true()) + current = std::cref(if_expr.true_case()); + else if(cond.is_false()) + current = std::cref(if_expr.false_case()); + else + UNREACHABLE; + } + const auto array = supert::get(current.get()); + const auto index = get(index_expr->index()); + const exprt unknown = + from_integer(CHARACTER_FOR_UNKNOWN, index_expr->type()); + if( + const auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown)) + { + if(const auto index_value = numeric_cast(index)) + return sparse_array->at(*index_value); + return sparse_array->to_if_expression(index); + } + + INVARIANT( + array.id() == ID_symbol, + "apart from symbols, array valuations can be interpreted as sparse " + "arrays"); + return index_exprt(array, index); + } + if(is_char_array_type(ecopy.type(), ns)) { array_string_exprt &arr = to_array_string_expr(ecopy); From beff419a34661fb9531262aacda5891b06a22c98 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:10:04 +0100 Subject: [PATCH 16/25] Use sparse arrays in get_array This ensures that arrays from the underlying solver are interpreted in a consistent manner in the solver (always using interval_sparse_arrayt). --- src/solvers/refinement/string_refinement.cpp | 55 +++++--------------- src/solvers/refinement/string_refinement.h | 2 + 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 99477f1adbb..bc5b1c48019 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -935,50 +935,21 @@ static optionalt get_array( } std::size_t n = *n_opt; - const array_typet ret_type(char_type, from_integer(n, index_type)); - array_exprt ret(ret_type); - - if(n>max_string_length) + if(n > MAX_CONCRETE_STRING_SIZE) { stream << "(sr::get_array) long string (size=" << n << ")" << eom; - return {}; - } - - if(n==0) - return empty_ret; - - if(arr_val.id()=="array-list") - { - DATA_INVARIANT( - arr_val.operands().size()%2==0, - string_refinement_invariantt("and index expression must be on a symbol, " - "with, array_of, if, or array, and all " - "cases besides array are handled above")); - std::map initial_map; - for(size_t i = 0; i < arr_val.operands().size(); i += 2) - { - exprt index = arr_val.operands()[i]; - if(auto idx = numeric_cast(index)) - { - if(*idx < n) - initial_map[*idx] = arr_val.operands()[i + 1]; - } - } - - // Pad the concretized values to the left to assign the uninitialized - // values of result. - ret.operands()=fill_in_map_as_vector(initial_map); - return ret; - } - else if(arr_val.id()==ID_array) - { - // copy the `n` first elements of `arr_val` - for(size_t i=0; iconcretize(n, index_type); + return {}; } /// convert the content of a string to a more readable representation. This diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 281f004da87..9289b3a78f4 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -31,6 +31,8 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define DEFAULT_MAX_NB_REFINEMENT std::numeric_limits::max() #define CHARACTER_FOR_UNKNOWN '?' +// Limit the size of strings in traces to 64M chars to avoid memout +#define MAX_CONCRETE_STRING_SIZE (1 << 26) class string_refinementt final: public bv_refinementt { From c58ac60bbdb346ab7d8849dd673fa3a76f643b7c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:11:51 +0100 Subject: [PATCH 17/25] Avoid copy in ranged for loops over expressions --- src/solvers/refinement/string_refinement.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index bc5b1c48019..1b5241187c3 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -482,7 +482,7 @@ union_find_replacet string_identifiers_resolution_from_equations( equations_to_treat.push(i); std::vector rhs_strings = extract_strings(eq.rhs()); - for(const auto expr : rhs_strings) + for(const auto &expr : rhs_strings) equation_map.add(i, expr); } else if(eq.lhs().type().id() != ID_pointer && @@ -490,7 +490,7 @@ union_find_replacet string_identifiers_resolution_from_equations( { std::vector lhs_strings = extract_strings_from_lhs(eq.lhs()); - for(const auto expr : lhs_strings) + for(const auto &expr : lhs_strings) equation_map.add(i, expr); if(lhs_strings.empty()) From 8ed138b24ccbace045e7b68e027197f26bc80e0d Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:12:30 +0100 Subject: [PATCH 18/25] Remove unused header --- src/solvers/refinement/string_refinement.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 1b5241187c3..0419ea31004 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -23,7 +23,6 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include -#include #include #include #include From 052d503ea5b3f5bfc3fb70552156483c2b1bac09 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:41:36 +0100 Subject: [PATCH 19/25] Use get_array in get_char_array_and_concretize This makes sure the way we interpret arrays is consistent even in debugging functions. --- src/solvers/refinement/string_refinement.cpp | 43 ++++++++++---------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 0419ea31004..4efbfebc6a6 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -989,34 +989,35 @@ static exprt get_char_array_and_concretize( if(arr_model_opt) { stream << indent << indent << "- char_array: " << format(*arr_model_opt) + << '\n'; + stream << indent << indent << "- type : " << format(arr_model_opt->type()) << eom; const exprt simple = simplify_expr(*arr_model_opt, ns); stream << indent << indent << "- simplified_char_array: " << format(simple) << eom; - const exprt concretized_array = - concretize_arrays_in_expression(simple, max_string_length, ns); - stream << indent << indent - << "- concretized_char_array: " << format(concretized_array) << eom; - - if(concretized_array.id() == ID_array) - { - stream << indent << indent << "- as_string: \"" - << string_of_array(to_array_expr(concretized_array)) << "\"\n"; - } - else + if( + const auto concretized_array = get_array( + super_get, ns, max_string_length, stream, to_array_string_expr(simple))) { - stream << indent << "- warning: not an array" << eom; - } + stream << indent << indent + << "- concretized_char_array: " << format(*concretized_array) + << eom; - stream << indent << indent << "- type: " << format(concretized_array.type()) - << eom; - return concretized_array; - } - else - { - stream << indent << indent << "- incomplete model" << eom; - return arr; + if( + const auto array_expr = + expr_try_dynamic_cast(*concretized_array)) + { + stream << indent << indent << "- as_string: \"" + << string_of_array(*array_expr) << "\"\n"; + } + else + stream << indent << "- warning: not an array" << eom; + return *concretized_array; + } + return simple; } + stream << indent << indent << "- incomplete model" << eom; + return arr; } /// Display part of the current model by mapping the variables created by the From 6d87233b14b769f67d3d4ff510ee689e8cbdbc22 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 11:41:42 +0100 Subject: [PATCH 20/25] Use concretize instead of fill_in_array This will allow to remove fill_in_array_expr which duplicates what concretize does. --- src/solvers/refinement/string_refinement.cpp | 5 +++-- src/solvers/refinement/string_refinement_util.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 4efbfebc6a6..25ab5d7cd1f 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -2264,8 +2264,9 @@ exprt string_refinementt::get(const exprt &expr) const const exprt length = super_get(arr.length()); if(const auto n = numeric_cast(length)) { - const array_exprt arr_model(array_typet(arr.type().subtype(), length)); - return fill_in_array_expr(arr_model, generator.max_string_length); + const interval_sparse_arrayt sparse_array( + from_integer(CHARACTER_FOR_UNKNOWN, arr.type().subtype())); + return sparse_array.concretize(*n, length.type()); } } return arr; diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index d1ed9ae0710..d05c5a2d88c 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -148,7 +148,7 @@ class interval_sparse_arrayt final : public sparse_arrayt /// Complexity is linear in the number of entries. exprt at(std::size_t index) const override; -private: + /// Array containing the same value at each index. explicit interval_sparse_arrayt(exprt default_value) : sparse_arrayt(default_value) { From a6c4010dacb02653222e4da806023bcbe6ac9fbd Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 15:42:56 +0100 Subject: [PATCH 21/25] Stop using concretize_arrays_in_expression This is now unecessary as get_array takes care of the concretization using sparse arrays. This allows use to delete concretize_arrays_in_expression which is now unused. Equivalent behavior to this function can be obtained using sparse arrays. --- src/solvers/refinement/string_refinement.cpp | 123 +------------------ src/solvers/refinement/string_refinement.h | 4 - 2 files changed, 2 insertions(+), 125 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 25ab5d7cd1f..3aa14f1ddf4 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -46,7 +46,7 @@ static optionalt find_counter_example( /// * the negation of `a` is an existential formula `b`; /// * we substituted symbols in `b` by their values found in `get`; /// * arrays are concretized, meaning we attribute a value for characters that -/// are unknown to get, for details see concretize_arrays_in_expression; +/// are unknown to get, for details see substitute_array_access; /// * `b` is simplified and array accesses are replaced by expressions /// without arrays; /// * we give lemma `b` to a fresh solver; @@ -1080,81 +1080,6 @@ static exprt substitute_array_access( : sparse_arrayt(expr).to_if_expression(index); } -/// Fill an array represented by a list of with_expr by propagating values to -/// the left. For instance `ARRAY_OF(12) WITH[2:=24] WITH[4:=42]` will give -/// `{ 24, 24, 24, 42, 42 }` -/// \param expr: an array expression in the form -/// `ARRAY_OF(x) WITH [i0:=v0] ... WITH [iN:=vN]` -/// \param string_max_length: bound on the length of strings -/// \return an array expression with filled in values, or expr if it is simply -/// an `ARRAY_OF(x)` expression -static array_exprt -fill_in_array_with_expr(const exprt &expr, const std::size_t string_max_length) -{ - PRECONDITION(expr.type().id()==ID_array); - PRECONDITION(expr.id()==ID_with || expr.id()==ID_array_of); - const array_typet &array_type = to_array_type(expr.type()); - - // Map of the parts of the array that are initialized - std::map initial_map; - - // Set the last index to be sure the array will have the right length - const auto &array_size_opt = numeric_cast(array_type.size()); - if(array_size_opt && *array_size_opt > 0) - initial_map.emplace( - *array_size_opt - 1, - from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); - - for(exprt it=expr; it.id()==ID_with; it=to_with_expr(it).old()) - { - // Add to `initial_map` all the pairs (index,value) contained in `WITH` - // statements - const with_exprt &with_expr = to_with_expr(it); - const exprt &then_expr=with_expr.new_value(); - const auto index = - numeric_cast_v(to_constant_expr(with_expr.where())); - if( - index < string_max_length && (!array_size_opt || index < *array_size_opt)) - initial_map.emplace(index, then_expr); - } - - array_exprt result(array_type); - result.operands() = fill_in_map_as_vector(initial_map); - return result; -} - -/// Fill an array represented by an array_expr by propagating values to -/// the left for unknown values. For instance `{ 24 , * , * , 42, * }` will give -/// `{ 24, 42, 42, 42, '?' }` -/// \param expr: an array expression -/// \param string_max_length: bound on the length of strings -/// \return an array expression with filled in values -exprt fill_in_array_expr(const array_exprt &expr, std::size_t string_max_length) -{ - PRECONDITION(expr.type().id() == ID_array); - const array_typet &array_type = to_array_type(expr.type()); - PRECONDITION(array_type.subtype().id() == ID_unsignedbv); - - // Map of the parts of the array that are initialized - std::map initial_map; - const auto &array_size_opt = numeric_cast(array_type.size()); - - if(array_size_opt && *array_size_opt > 0) - initial_map.emplace( - *array_size_opt - 1, - from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); - - for(std::size_t i = 0; i < expr.operands().size(); ++i) - { - if(i < string_max_length && expr.operands()[i].id() != ID_unknown) - initial_map[i] = expr.operands()[i]; - } - - array_exprt result(array_type); - result.operands()=fill_in_map_as_vector(initial_map); - return result; -} - /// Create an equivalent expression where array accesses are replaced by 'if' /// expressions: for instance in array access `arr[index]`, where: /// `arr := {12, 24, 48}` the constructed expression will be: @@ -1348,46 +1273,6 @@ static exprt negation_of_constraint(const string_constraintt &axiom) return negaxiom; } -/// Result of the solver `supert` should not be interpreted literally for char -/// arrays as not all indices are present in the index set. -/// In the given expression, we populate arrays at the indices for which the -/// solver has no constraint by copying values to the left. -/// For example an expression `ARRAY_OF(0) WITH [1:=2] WITH [4:=3]` would -/// be interpreted as `{ 2, 2, 3, 3, 3}`. -/// \param expr: expression to interpret -/// \param string_max_length: maximum size of arrays to consider -/// \param ns: namespace, used to determine what is an array of character -/// \return the interpreted expression -exprt concretize_arrays_in_expression( - exprt expr, - std::size_t string_max_length, - const namespacet &ns) -{ - auto it=expr.depth_begin(); - const auto end=expr.depth_end(); - while(it!=end) - { - if(is_char_array_type(it->type(), ns)) - { - if(it->id() == ID_with || it->id() == ID_array_of) - { - it.mutate() = fill_in_array_with_expr(*it, string_max_length); - it.next_sibling_or_parent(); - } - else if(it->id() == ID_array) - { - it.mutate() = fill_in_array_expr(to_array_expr(*it), string_max_length); - it.next_sibling_or_parent(); - } - else - ++it; // ignoring other expressions - } - else - ++it; - } - return expr; -} - /// Debugging function which outputs the different steps an axiom goes through /// to be checked in check axioms. static void debug_check_axioms_step( @@ -2253,11 +2138,7 @@ exprt string_refinementt::get(const exprt &expr) const if( const auto arr_model_opt = get_array(super_get, ns, generator.max_string_length, debug(), arr)) - { - // \todo get_array should take care of the concretization - return concretize_arrays_in_expression( - *arr_model_opt, generator.max_string_length, ns); - } + return *arr_model_opt; if(generator.get_created_strings().count(arr)) { diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 9289b3a78f4..0fd5dbc8f5a 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -93,10 +93,6 @@ class string_refinementt final: public bv_refinementt }; exprt substitute_array_lists(exprt expr, std::size_t string_max_length); -exprt concretize_arrays_in_expression( - exprt expr, - std::size_t string_max_length, - const namespacet &ns); // Declaration required for unit-test: union_find_replacet string_identifiers_resolution_from_equations( From 1a087725720964bf3c21e8aa4a50f1089f008ccb Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Apr 2018 09:40:47 +0100 Subject: [PATCH 22/25] Tests where model involves long strings --- .../jbmc-strings/long_string/Test.class | Bin 0 -> 985 bytes regression/jbmc-strings/long_string/Test.java | 41 ++++++++++++++++++ regression/jbmc-strings/long_string/test.desc | 11 +++++ .../jbmc-strings/long_string/test_abort.desc | 9 ++++ 4 files changed, 61 insertions(+) create mode 100644 regression/jbmc-strings/long_string/Test.class create mode 100644 regression/jbmc-strings/long_string/Test.java create mode 100644 regression/jbmc-strings/long_string/test.desc create mode 100644 regression/jbmc-strings/long_string/test_abort.desc diff --git a/regression/jbmc-strings/long_string/Test.class b/regression/jbmc-strings/long_string/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..de6b488b03b1fb4093bf83ec7dc0a8eec3ef3a01 GIT binary patch literal 985 zcmaiy&rcIU6vw~Q-FCOztt~~dfK+~z(w0I&dXp%mYC-~PQeq;v?RL^G)Lk;WMwDVClwsCBQ!8B&{$%OCe%G`ZN8!3_i-{yX>D4UoVLWUAe zlre9jf&~qWCaPF6kwTgwJ3v{#nD3zwo}fWw6%ETK?qG!>J&1hL+~qd)|L5pz4idxm zT-&0%!WC1NE{{nWUGBDnPAK07!>A|PReN7}2V7L^PeVH>-m%0w6(6~x_(rKtst+LR zU3xGIZE@cbyw#tMG!1%mL9ew#k8je?09ZOJ^&n3LlbX>wOuGrPjBK=g0(K+=ah=v! zc&N}%xq%V#W#}73ppacFpF*4e3H7Kve~idhL2egfmW>=FsPEFuy1(&;D0VGv*vS;8G9FHf+u1iD11%S3UXIBrsO5-}aWkf2ac01je9aoV*;NOcB&H^_d~4JVDK21;r4?M6?f+q>Qn? nuC-7=U?^iABY6l-{Qy<@1bs05g~oEKC?e*Xe2H<|L~!dbi@?9b literal 0 HcmV?d00001 diff --git a/regression/jbmc-strings/long_string/Test.java b/regression/jbmc-strings/long_string/Test.java new file mode 100644 index 00000000000..63b0d69babc --- /dev/null +++ b/regression/jbmc-strings/long_string/Test.java @@ -0,0 +1,41 @@ +import org.cprover.CProverString; + +public class Test { + public static void check(String s, String t) { + // Filter + if(s == null || t == null) + return; + + // Act + String u = s.concat(t); + + // Filter out + if(u.length() < 3_000_000) + return; + if(CProverString.charAt(u, 500_000) != 'a') + return; + if(CProverString.charAt(u, 2_000_000) != 'b') + return; + + // Assert + assert(false); + } + + public static void checkAbort(String s, String t) { + // Filter + if(s == null || t == null) + return; + + // Act + String u = s.concat(t); + + // Filter out + if(u.length() < 67_108_864) + return; + if(CProverString.charAt(u, 2_000_000) != 'b') + return; + + // Assert + assert(false); + } +} diff --git a/regression/jbmc-strings/long_string/test.desc b/regression/jbmc-strings/long_string/test.desc new file mode 100644 index 00000000000..7f5301726d3 --- /dev/null +++ b/regression/jbmc-strings/long_string/test.desc @@ -0,0 +1,11 @@ +CORE +Test.class +--refine-strings --function Test.check --string-max-input-length 2000000 +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 21 .* FAILURE +-- +-- +This checks that the solver manage to violate assertions even when this requires +some very long strings, as long as they don't exceed the arbitrary limit that +is set by the solver (64M characters). diff --git a/regression/jbmc-strings/long_string/test_abort.desc b/regression/jbmc-strings/long_string/test_abort.desc new file mode 100644 index 00000000000..22074e1ed1e --- /dev/null +++ b/regression/jbmc-strings/long_string/test_abort.desc @@ -0,0 +1,9 @@ +CORE +Test.class +--refine-strings --function Test.checkAbort +^EXIT=6$ +^SIGNAL=0$ +-- +-- +This tests should abort, because concretizing a string of the required +length may take to much memory. From 8277e92bb97b9cff1060efbc0d566fef45a889f4 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 4 Apr 2018 17:22:29 +0100 Subject: [PATCH 23/25] Stop using concretize_array_expr in unit tests We should use sparse arrays as in the solver. --- .../string_refinement/concretize_array.cpp | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/unit/solvers/refinement/string_refinement/concretize_array.cpp b/unit/solvers/refinement/string_refinement/concretize_array.cpp index a64303d2439..0b668de8fe8 100644 --- a/unit/solvers/refinement/string_refinement/concretize_array.cpp +++ b/unit/solvers/refinement/string_refinement/concretize_array.cpp @@ -1,7 +1,7 @@ /*******************************************************************\ - Module: Unit tests for concretize_array_expression in - solvers/refinement/string_refinement.cpp + Module: Unit tests for interval_sparse_arrayt::concretizein + solvers/refinement/string_refinement_util.cpp Author: Diffblue Ltd. @@ -18,6 +18,7 @@ SCENARIO("concretize_array_expression", "[core][solvers][refinement][string_refinement]") { + // Arrange const typet char_type=unsignedbv_typet(16); const typet int_type=signedbv_typet(32); const exprt index1=from_integer(1, int_type); @@ -27,33 +28,29 @@ SCENARIO("concretize_array_expression", const exprt index100=from_integer(100, int_type); const exprt char0=from_integer('0', char_type); const exprt index2=from_integer(2, int_type); + const exprt charz = from_integer('z', char_type); array_typet array_type(char_type, infinity_exprt(int_type)); // input_expr is - // `'0' + (ARRAY_OF(0) WITH [1:=x] WITH [4:=y] WITH [100:=z])[2]` - const plus_exprt input_expr( - from_integer('0', char_type), - index_exprt( + // ARRAY_OF(0) WITH [1:=x] WITH [4:=y] WITH [100:=z]` + const with_exprt input_expr( + with_exprt( with_exprt( - with_exprt( - with_exprt( - array_of_exprt(from_integer(0, char_type), array_type), - index1, - charx), - index4, - chary), - index100, - from_integer('z', char_type)), - index2)); - - // String max length is 50, so index 100 should get ignored. - symbol_tablet symbol_table; - namespacet ns(symbol_table); - const exprt concrete = concretize_arrays_in_expression(input_expr, 50, ns); - - // The expected result is `'0' + { 'x', 'x', 'y', 'y', 'y' }` - array_exprt array(array_type); - array.operands()={charx, charx, chary, chary, chary}; - const plus_exprt expected(char0, index_exprt(array, index2)); + array_of_exprt(from_integer(0, char_type), array_type), index1, charx), + index4, + chary), + index100, + charz); + + // Act + const interval_sparse_arrayt sparse_array(input_expr); + // String size is 7, so index 100 should get ignored. + const exprt concrete = sparse_array.concretize(7, int_type); + + // Assert + array_exprt expected(array_type); + // The expected result is `{ 'x', 'x', 'y', 'y', 'y', 'z', 'z' }` + expected.operands() = {charx, charx, chary, chary, chary, charz, charz}; + to_array_type(expected.type()).size() = from_integer(7, int_type); REQUIRE(concrete==expected); } From 5392645f2aeaaa599d12b56d2aebccecff4d9276 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 5 Apr 2018 14:47:23 +0100 Subject: [PATCH 24/25] Reserve size of array in concretize --- src/solvers/refinement/string_refinement_util.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 6714a49af4f..edc998098ae 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -208,6 +208,7 @@ array_exprt interval_sparse_arrayt::concretize( const array_typet array_type( default_value.type(), from_integer(size, index_type)); array_exprt array(array_type); + array.operands().reserve(size); std::size_t current_index = 0; for(const auto &pair : entries) From 64b336a15024dcc80978c74e1df032a7f12565d8 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 6 Apr 2018 17:59:41 +0100 Subject: [PATCH 25/25] Refactor interval_sparse_array::concretize --- src/solvers/refinement/string_refinement_util.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index edc998098ae..d556151a7dd 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -210,16 +210,15 @@ array_exprt interval_sparse_arrayt::concretize( array_exprt array(array_type); array.operands().reserve(size); - std::size_t current_index = 0; - for(const auto &pair : entries) - { - for(; current_index <= pair.first && current_index < size; ++current_index) - array.operands().push_back(pair.second); - } - for(; current_index < size; ++current_index) - array.operands().push_back(default_value); + auto it = entries.begin(); + for(; it != entries.end() && it->first < size; ++it) + array.operands().resize(it->first + 1, it->second); + + array.operands().resize( + size, it == entries.end() ? default_value : it->second); return array; } + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i);