From 71f80e1fc4f88d3d9fce127fd8971b056bad0432 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 6 Jun 2018 07:35:12 +0100 Subject: [PATCH 1/5] Deprecates add_axioms_for_is_suffix We can get the same result using is_prefix with an offset which is the difference of the two lengths. --- src/solvers/README.md | 5 ++--- .../refinement/string_constraint_generator_testing.cpp | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/README.md b/src/solvers/README.md index 7a45f246662..e2f06856aff 100644 --- a/src/solvers/README.md +++ b/src/solvers/README.md @@ -231,9 +231,6 @@ allocates a new string before calling a primitive. * `cprover_string_is_prefix` : \copybrief string_constraint_generatort::add_axioms_for_is_prefix \link string_constraint_generatort::add_axioms_for_is_prefix More... \endlink - * `cprover_string_is_suffix` : - \copybrief string_constraint_generatort::add_axioms_for_is_suffix - \link string_constraint_generatort::add_axioms_for_is_suffix More... \endlink * `cprover_string_index_of` : \copybrief string_constraint_generatort::add_axioms_for_index_of(const function_application_exprt &f) \link string_constraint_generatort::add_axioms_for_index_of(const function_application_exprt &f) More... \endlink @@ -319,6 +316,8 @@ allocates a new string before calling a primitive. * `cprover_string_intern` : Never tested. * `cprover_string_is_empty` : Should use `cprover_string_length(s) == 0` instead. + * `cprover_string_is_suffix` : Should use `cprover_string_is_prefix` with an + offset argument. * `cprover_string_empty_string` : Can use literal of empty string instead. * `cprover_string_of_long` : Should be the same as `cprover_string_of_int`. * `cprover_string_delete_char_at` : A call to diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8c182f1dc49..e684b7123ba 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -135,6 +135,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( /// \param swap_arguments: boolean flag telling whether the suffix is the second /// argument or the first argument /// \return Boolean expression `issuffix` +DEPRECATED("should use `strings_startwith(s0, s1, s1.length - s0.length)`") exprt string_constraint_generatort::add_axioms_for_is_suffix( const function_application_exprt &f, bool swap_arguments) { From 6763669941ec1d652e85307a544a7e6cf810ee81 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 6 Jun 2018 07:30:59 +0100 Subject: [PATCH 2/5] Correct starts_with for offset out of bound case The builtin function was not clearly specified for the case where the offset argument is negative, or exceeding the size of the string, and was not resilient to overflows. We now enforce that if the offset is out of bounds then the return value is false. The axioms added were not correct in the case of offset --- .../string_constraint_generator_testing.cpp | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index e684b7123ba..dc827e951e0 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -15,16 +15,20 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include /// Add axioms stating that the returned expression is true exactly when the -/// first string is a prefix of the second one, starting at position offset. +/// offset is greater or equal to 0 and the first string is a prefix of the +/// second one, starting at position offset. /// /// These axioms are: -/// 1. \f$ {\tt isprefix} \Rightarrow |str| \ge |{\tt prefix}|+offset \f$ +/// 1. \f$ {\tt isprefix} \Rightarrow {\tt offset_within_bounds}\f$ /// 2. \f$ \forall 0 \le qvar<|{\tt prefix}|.\ {\tt isprefix} /// \Rightarrow s0[witness+{\tt offset}]=s2[witness] \f$ -/// 3. \f$ !{\tt isprefix} \Rightarrow |{\tt str}|<|{\tt prefix}|+{\tt offset} +/// 3. \f$ (\lnot {\tt isprefix} \Rightarrow +/// \lnot {\tt offset_within_bounds} /// \lor (0 \le witness<|{\tt prefix}| -/// \land {\tt str}[witness+{\tt offset}] \ne {\tt prefix}[witness])\f$ -/// +/// \land {\tt str}[witness+{\tt offset}] \ne {\tt prefix}[witness])\f$ +/// where \f$ {\tt offset_within_bounds} \f$ is +/// \f$ offset \ge 0 \land offset < |str| \land +/// |str| - |{\tt prefix}| \ge offset \f$ /// \param prefix: an array of characters /// \param str: an array of characters /// \param offset: an integer @@ -34,34 +38,39 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( const array_string_exprt &str, const exprt &offset) { - symbol_exprt isprefix=fresh_boolean("isprefix"); - const typet &index_type=str.length().type(); + const symbol_exprt isprefix = fresh_boolean("isprefix"); + const typet &index_type = str.length().type(); + const exprt offset_within_bounds = and_exprt( + binary_relation_exprt(offset, ID_ge, from_integer(0, offset.type())), + binary_relation_exprt(offset, ID_le, str.length()), + binary_relation_exprt( + minus_exprt(str.length(), offset), ID_ge, prefix.length())); // Axiom 1. - lemmas.push_back( - implies_exprt( - isprefix, str.axiom_for_length_ge(plus_exprt(prefix.length(), offset)))); - - symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); - string_constraintt a2( - qvar, - prefix.length(), - implies_exprt( - isprefix, equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar]))); - constraints.push_back(a2); - - symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); - and_exprt witness_diff( - axiom_for_is_positive_index(witness), - and_exprt( + lemmas.push_back(implies_exprt(isprefix, offset_within_bounds)); + + // Axiom 2. + constraints.push_back([&] { + const symbol_exprt qvar = fresh_univ_index("QA_isprefix", index_type); + const exprt body = implies_exprt( + isprefix, equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + return string_constraintt(qvar, prefix.length(), body); + }()); + + // Axiom 3. + lemmas.push_back([&] { + const exprt witness = fresh_exist_index("witness_not_isprefix", index_type); + const exprt strings_differ_at_witness = and_exprt( + axiom_for_is_positive_index(witness), prefix.axiom_for_length_gt(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); - or_exprt s0_notpref_s1( - not_exprt(str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))), - witness_diff); + notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness])); + const exprt s1_does_not_start_with_s0 = or_exprt( + not_exprt(offset_within_bounds), + not_exprt(str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))), + strings_differ_at_witness); + return implies_exprt(not_exprt(isprefix), s1_does_not_start_with_s0); + }()); - implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); - lemmas.push_back(a3); return isprefix; } From d599dfc83a87bf3fa7ef5b01c7a3552cc843887a Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 6 Jun 2018 17:55:40 +0100 Subject: [PATCH 3/5] Allow duplicate instantiations of the same constant string We normally don't want the same string to be accessed at two indexes in the same universal formula, but for constant strings this does not really matter, so we can allow it. In particular the previous invariant would have failed if we try to write `cprover_starts_with("foo", "foo", 1)`. --- src/solvers/refinement/string_refinement.cpp | 72 ++++++++------------ 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index cdc7f984d32..57be41871c1 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -1833,18 +1833,17 @@ static void update_index_set( } } -/// Finds an index on `str` used in `expr` that contains `qvar`, for instance -/// with arguments ``(str[k]=='a')``, `str`, and `k`, the function should -/// return `k`. -/// If two different such indexes exist, an invariant will fail. +/// Find indexes of `str` used in `expr` that contains `qvar`, for instance +/// with arguments ``(str[k+1]=='a')``, `str`, and `k`, the function should +/// return `k+1`. /// \param [in] expr: the expression to search /// \param [in] str: the string which must be indexed /// \param [in] qvar: the universal variable that must be in the index -/// \return an index expression in `expr` on `str` containing `qvar`. Returns -/// an empty optional when `expr` does not contain `str`. -static optionalt -find_index(const exprt &expr, const exprt &str, const symbol_exprt &qvar) +/// \return index expressions in `expr` on `str` containing `qvar`. +static std::unordered_set +find_indexes(const exprt &expr, const exprt &str, const symbol_exprt &qvar) { + decltype(find_indexes(expr, str, qvar)) result; auto index_str_containing_qvar = [&](const exprt &e) { if(auto index_expr = expr_try_dynamic_cast(e)) { @@ -1855,28 +1854,11 @@ find_index(const exprt &expr, const exprt &str, const symbol_exprt &qvar) return false; }; - auto it = std::find_if( - expr.depth_begin(), expr.depth_end(), index_str_containing_qvar); - if(it == expr.depth_end()) - return {}; - const exprt &index = to_index_expr(*it).index(); - - // Check that there are no two such indexes - it.next_sibling_or_parent(); - while(it != expr.depth_end()) - { - if(index_str_containing_qvar(*it)) - { - INVARIANT( - to_index_expr(*it).index() == index, - "string should always be indexed by same value in a given formula"); - it.next_sibling_or_parent(); - } - else - ++it; - } - - return index; + std::for_each(expr.depth_begin(), expr.depth_end(), [&](const exprt &e) { + if(index_str_containing_qvar(e)) + result.insert(to_index_expr(e).index()); + }); + return result; } /// Instantiates a string constraint by substituting the quantifiers. @@ -1897,18 +1879,24 @@ static exprt instantiate( const exprt &str, const exprt &val) { - const optionalt idx = find_index(axiom.body, str, axiom.univ_var); - if(!idx.has_value()) - return true_exprt(); - - const exprt r = compute_inverse_function(stream, axiom.univ_var, val, *idx); - implies_exprt instance( - and_exprt( - binary_relation_exprt(axiom.univ_var, ID_ge, axiom.lower_bound), - binary_relation_exprt(axiom.univ_var, ID_lt, axiom.upper_bound)), - axiom.body); - replace_expr(axiom.univ_var, r, instance); - return instance; + const auto indexes = find_indexes(axiom.body, str, axiom.univ_var); + INVARIANT( + str.id() == ID_array || indexes.size() <= 1, + "non constant array should always be accessed at the same index"); + exprt::operandst conjuncts; + for(const auto &index : indexes) + { + const exprt univ_var_value = + compute_inverse_function(stream, axiom.univ_var, val, index); + implies_exprt instance( + and_exprt( + binary_relation_exprt(axiom.univ_var, ID_ge, axiom.lower_bound), + binary_relation_exprt(axiom.univ_var, ID_lt, axiom.upper_bound)), + axiom.body); + replace_expr(axiom.univ_var, univ_var_value, instance); + conjuncts.push_back(instance); + } + return conjunction(conjuncts); } /// Instantiates a quantified formula representing `not_contains` by From 0c5615145b5e154444ee510c66dd4031a5efc5ed Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Wed, 6 Jun 2018 18:19:15 +0100 Subject: [PATCH 4/5] Add test for StartsWith One of this test is an exhaustive comparison with the result of a reference implementation. --- .../jbmc-strings/StartsWith/Test.class | Bin 0 -> 1443 bytes .../jbmc-strings/StartsWith/Test.java | 72 ++++++++++++++++++ .../jbmc-strings/StartsWith/test.desc | 8 ++ .../jbmc-strings/StartsWith/test_det.desc | 13 ++++ .../jbmc-strings/StartsWith/test_nondet.desc | 8 ++ .../string_constraint_generator_testing.cpp | 6 +- 6 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 jbmc/regression/jbmc-strings/StartsWith/Test.class create mode 100644 jbmc/regression/jbmc-strings/StartsWith/Test.java create mode 100644 jbmc/regression/jbmc-strings/StartsWith/test.desc create mode 100644 jbmc/regression/jbmc-strings/StartsWith/test_det.desc create mode 100644 jbmc/regression/jbmc-strings/StartsWith/test_nondet.desc diff --git a/jbmc/regression/jbmc-strings/StartsWith/Test.class b/jbmc/regression/jbmc-strings/StartsWith/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..82850bfa7dc811ca517e67ec6304b65a1b0913f1 GIT binary patch literal 1443 zcmZ`(-*4Mg6#i~($ByGHO-DU!Ao1j;x3z`MEk@s=0elEKUndD~v(1+KT5 zI8hS6=O?=Y&E}2U&ON8ocY>YHW)k_qPWQi-jTQ@)c+S1^mUDNQP;+-Z_l|&(&R4x8 zg}}gKYPcB&scq_r3NoFs;`WEg&CPHxa=q)m#96rI#mS-sWnmJ>bzHRY3YK(SvapO- zExd-;EllG@3lpdaRLA(&{-Nkv6onDtY=apLUPCP1@0Q01!v8E0`aX3 z#MCedP*9L$4nsfXUZroj{xv4*_5pHj{(BFgDUX!ZhTMFEnq6;v^b|_F!QsAieXfXy zfBmLD;>lc^dmaYYI>8u(Q6vNlX9#lv$FRu#5NFMvje+aU17hxNqMdj*!8ru0VRKkswh#y*-?8hrNRJ}$ s.length() - prefix.length()) { + return false; + } + + for (int i = 0; i < prefix.length(); i++) { + if (org.cprover.CProverString.charAt(s, offset + i) + != org.cprover.CProverString.charAt(prefix, i)) { + return false; + } + } + return true; + } + + public static boolean check(String s, String t, int offset) { + // Filter out null strings + if(s == null || t == null) { + return false; + } + + // Act + final boolean result = s.startsWith(t, offset); + + // Assert + final boolean referenceResult = referenceStartsWith(s, t, offset); + assert(result == referenceResult); + + // Check reachability + assert(result == false); + return result; + } + + public static boolean checkDet() { + boolean result = false; + result = "foo".startsWith("foo", 0); + assert(result); + result = "foo".startsWith("f", -1); + assert(!result); + result = "foo".startsWith("oo", 1); + assert(result); + result = "foo".startsWith("f", 1); + assert(!result); + result = "foo".startsWith("bar", 0); + assert(!result); + result = "foo".startsWith("oo", 2); + assert(!result); + assert(false); + return result; + } + + public static boolean checkNonDet(String s) { + // Filter + if (s == null) { + return false; + } + + // Act + final boolean result = s.startsWith(s, 1); + + // Assert + assert(!result); + + // Check reachability + assert(false); + return result; + } +} diff --git a/jbmc/regression/jbmc-strings/StartsWith/test.desc b/jbmc/regression/jbmc-strings/StartsWith/test.desc new file mode 100644 index 00000000000..13bacc2677a --- /dev/null +++ b/jbmc/regression/jbmc-strings/StartsWith/test.desc @@ -0,0 +1,8 @@ +CORE +Test.class +--refine-strings --string-max-input-length 10 --unwind 10 --function Test.check +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 31 .*: SUCCESS +assertion at file Test.java line 34 .*: FAILURE +-- diff --git a/jbmc/regression/jbmc-strings/StartsWith/test_det.desc b/jbmc/regression/jbmc-strings/StartsWith/test_det.desc new file mode 100644 index 00000000000..bb9017ada0d --- /dev/null +++ b/jbmc/regression/jbmc-strings/StartsWith/test_det.desc @@ -0,0 +1,13 @@ +CORE +Test.class +--refine-strings --string-max-input-length 100 --unwind 10 --function Test.checkDet +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 41 .*: SUCCESS +assertion at file Test.java line 43 .*: SUCCESS +assertion at file Test.java line 45 .*: SUCCESS +assertion at file Test.java line 47 .*: SUCCESS +assertion at file Test.java line 49 .*: SUCCESS +assertion at file Test.java line 51 .*: SUCCESS +assertion at file Test.java line 52 .*: FAILURE +-- diff --git a/jbmc/regression/jbmc-strings/StartsWith/test_nondet.desc b/jbmc/regression/jbmc-strings/StartsWith/test_nondet.desc new file mode 100644 index 00000000000..b02b55f1509 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StartsWith/test_nondet.desc @@ -0,0 +1,8 @@ +CORE +Test.class +--refine-strings --string-max-input-length 100 --unwind 10 --function Test.checkNonDet +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 66 .*: SUCCESS +assertion at file Test.java line 69 .*: FAILURE +-- diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index dc827e951e0..c77dab6af53 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -21,14 +21,14 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// These axioms are: /// 1. \f$ {\tt isprefix} \Rightarrow {\tt offset_within_bounds}\f$ /// 2. \f$ \forall 0 \le qvar<|{\tt prefix}|.\ {\tt isprefix} -/// \Rightarrow s0[witness+{\tt offset}]=s2[witness] \f$ +/// \Rightarrow s0[qvar+{\tt offset}]=s2[qvar] \f$ /// 3. \f$ (\lnot {\tt isprefix} \Rightarrow /// \lnot {\tt offset_within_bounds} /// \lor (0 \le witness<|{\tt prefix}| /// \land {\tt str}[witness+{\tt offset}] \ne {\tt prefix}[witness])\f$ /// where \f$ {\tt offset_within_bounds} \f$ is -/// \f$ offset \ge 0 \land offset < |str| \land -/// |str| - |{\tt prefix}| \ge offset \f$ +/// \f$ offset \ge 0 \land offset \le |str| \land +/// |str| - offset \ge |{\tt prefix}| \f$ /// \param prefix: an array of characters /// \param str: an array of characters /// \param offset: an integer From 7a6277d7f868cdb714ca3f446d87064f9bac7767 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 11 Jun 2018 08:40:43 +0100 Subject: [PATCH 5/5] Reformat documentation without latex This is much more readable in the code, and still looks good in doxygen. --- .../string_constraint_generator_testing.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index c77dab6af53..829c1d21887 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -19,16 +19,16 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// second one, starting at position offset. /// /// These axioms are: -/// 1. \f$ {\tt isprefix} \Rightarrow {\tt offset_within_bounds}\f$ -/// 2. \f$ \forall 0 \le qvar<|{\tt prefix}|.\ {\tt isprefix} -/// \Rightarrow s0[qvar+{\tt offset}]=s2[qvar] \f$ -/// 3. \f$ (\lnot {\tt isprefix} \Rightarrow -/// \lnot {\tt offset_within_bounds} -/// \lor (0 \le witness<|{\tt prefix}| -/// \land {\tt str}[witness+{\tt offset}] \ne {\tt prefix}[witness])\f$ -/// where \f$ {\tt offset_within_bounds} \f$ is -/// \f$ offset \ge 0 \land offset \le |str| \land -/// |str| - offset \ge |{\tt prefix}| \f$ +/// 1. isprefix => offset_within_bounds +/// 2. forall qvar in [0, |prefix|). +/// isprefix => str[qvar + offset] = prefix[qvar] +/// 3. !isprefix => !offset_within_bounds +/// || 0 <= witness < |prefix| +/// && str[witness+offset] != prefix[witness] +/// +/// where offset_within_bounds is: +/// offset >= 0 && offset <= |str| && |str| - offset >= |prefix| +/// /// \param prefix: an array of characters /// \param str: an array of characters /// \param offset: an integer