From 8c4801b0792ddc75d92c4ee2e2ab1b0c0dcc0d76 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 13:57:50 +0100 Subject: [PATCH 01/14] Refactor add_axioms_for_to_lower_case --- .../refinement/string_constraint_generator.h | 3 + ...ng_constraint_generator_transformation.cpp | 98 +++++++++++-------- 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 342c563ac5a..5043dd235f6 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -175,6 +175,9 @@ class string_constraint_generatort final const array_string_exprt &str, const exprt &position, const exprt &character); + exprt add_axioms_for_to_lower_case( + const array_string_exprt &res, + const array_string_exprt &str); private: symbol_exprt fresh_boolean(const irep_idt &prefix); diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index a4bad2bdaf6..e7184380b11 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -251,55 +251,69 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( const array_string_exprt res = char_array_of_pointer(f.arguments()[1], f.arguments()[0]); const array_string_exprt str = get_string_expr(f.arguments()[2]); - const refined_string_typet &ref_type = - to_refined_string_type(f.arguments()[2].type()); - const typet &char_type=ref_type.get_char_type(); - const typet &index_type=ref_type.get_index_type(); - const exprt char_A=constant_char('A', char_type); - const exprt char_Z=constant_char('Z', char_type); + return add_axioms_for_to_lower_case(res, str); +} + +exprt string_constraint_generatort::add_axioms_for_to_lower_case( + const array_string_exprt &res, + const array_string_exprt &str) +{ + const typet &char_type = res.type().subtype(); + const typet &index_type = res.length().type(); + const exprt char_A = from_integer('A', char_type); + const exprt char_Z = from_integer('Z', char_type); // \todo for now, only characters in Basic Latin and Latin-1 supplement // are supported (up to 0x100), we should add others using case mapping // information from the UnicodeData file. - equal_exprt a1(res.length(), str.length()); - lemmas.push_back(a1); + lemmas.push_back(equal_exprt(res.length(), str.length())); - symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type); - exprt::operandst upper_case; - // Characters between 'A' and 'Z' are upper-case - upper_case.push_back(and_exprt( - binary_relation_exprt(char_A, ID_le, str[idx]), - binary_relation_exprt(str[idx], ID_le, char_Z))); - - // Characters between 0xc0 (latin capital A with grave) - // and 0xd6 (latin capital O with diaeresis) are upper-case - upper_case.push_back(and_exprt( - binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]), - binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type)))); - - // Characters between 0xd8 (latin capital O with stroke) - // and 0xde (latin capital thorn) are upper-case - upper_case.push_back(and_exprt( - binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]), - binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type)))); - - exprt is_upper_case=disjunction(upper_case); - - // The difference between upper-case and lower-case for the basic latin and - // latin-1 supplement is 0x20. - exprt diff=from_integer(0x20, char_type); - equal_exprt converted(res[idx], plus_exprt(str[idx], diff)); - and_exprt non_converted( - equal_exprt(res[idx], str[idx]), - binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type))); - if_exprt conditional_convert(is_upper_case, converted, non_converted); - - string_constraintt a2( - idx, zero_if_negative(res.length()), conditional_convert); - constraints.push_back(a2); + constraints.push_back([&] { + const symbol_exprt idx = fresh_univ_index("QA_lower_case", index_type); + + const exprt is_upper_case = disjunction([&] { + exprt::operandst upper_case; + // Characters between 'A' and 'Z' are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt(char_A, ID_le, str[idx]), + binary_relation_exprt(str[idx], ID_le, char_Z))); + + // Characters between 0xc0 (latin capital A with grave) + // and 0xd6 (latin capital O with diaeresis) are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]), + binary_relation_exprt( + str[idx], ID_le, from_integer(0xd6, char_type)))); + + // Characters between 0xd8 (latin capital O with stroke) + // and 0xde (latin capital thorn) are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]), + binary_relation_exprt( + str[idx], ID_le, from_integer(0xde, char_type)))); + return upper_case; + }()); + + const exprt conditional_convert = [&] { + // The difference between upper-case and lower-case for the basic latin and + // latin-1 supplement is 0x20. + const exprt diff = from_integer(0x20, char_type); + const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff)); + const exprt non_converted = and_exprt( + equal_exprt(res[idx], str[idx]), + binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type))); + return if_exprt(is_upper_case, converted, non_converted); + }(); - return from_integer(0, f.type()); + return string_constraintt( + idx, zero_if_negative(res.length()), conditional_convert); + }()); + + return from_integer(0, get_return_code_type()); } /// Add axioms ensuring `res` corresponds to `str` in which lowercase characters From a71f2c0192516c09614d460a54df22213d656c2c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 14:06:27 +0100 Subject: [PATCH 02/14] Implement builtin string_to_lower_case function --- .../refinement/string_builtin_function.cpp | 19 +++++++++++ .../refinement/string_builtin_function.h | 33 +++++++++++++++++++ .../refinement/string_refinement_util.cpp | 4 +++ 3 files changed, 56 insertions(+) diff --git a/src/solvers/refinement/string_builtin_function.cpp b/src/solvers/refinement/string_builtin_function.cpp index 3739d6e85c2..ab3734cce41 100644 --- a/src/solvers/refinement/string_builtin_function.cpp +++ b/src/solvers/refinement/string_builtin_function.cpp @@ -187,6 +187,25 @@ exprt string_set_char_builtin_functiont::length_constraint() const equal_exprt(return_code, return_value)); } +optionalt string_to_lower_case_builtin_functiont::eval( + const std::function &get_value) const +{ + auto input_opt = eval_string(input, get_value); + if(!input_opt) + return {}; + for(mp_integer &c : input_opt.value()) + { + if( + ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) || + (0xd8 <= c && c <= 0xde)) + c += 0x20; + } + const auto length = + from_integer(input_opt.value().size(), result.length().type()); + const array_typet type(result.type().subtype(), length); + return make_string(input_opt.value(), type); +} + std::vector string_insertion_builtin_functiont::eval( const std::vector &input1_value, const std::vector &input2_value, diff --git a/src/solvers/refinement/string_builtin_function.h b/src/solvers/refinement/string_builtin_function.h index 0480b5d3d65..685df17f650 100644 --- a/src/solvers/refinement/string_builtin_function.h +++ b/src/solvers/refinement/string_builtin_function.h @@ -177,6 +177,39 @@ class string_set_char_builtin_functiont exprt length_constraint() const override; }; +/// Converting each uppercase character of Basic Latin and Latin-1 supplement +/// to the corresponding lowercase character. +class string_to_lower_case_builtin_functiont + : public string_transformation_builtin_functiont +{ +public: + string_to_lower_case_builtin_functiont( + const exprt &return_code, + const std::vector &fun_args, + array_poolt &array_pool) + : string_transformation_builtin_functiont(return_code, fun_args, array_pool) + { + } + + optionalt + eval(const std::function &get_value) const override; + + std::string name() const override + { + return "to_lower_case"; + } + + exprt add_constraints(string_constraint_generatort &generator) const override + { + return generator.add_axioms_for_to_lower_case(result, input); + }; + + exprt length_constraint() const override + { + return equal_exprt(result.length(), input.length()); + }; +}; + /// String inserting a string into another one class string_insertion_builtin_functiont : public string_builtin_functiont { diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 05791860b35..90eaa8b23ad 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -232,6 +232,10 @@ static std::unique_ptr to_string_builtin_function( return util_make_unique( return_code, fun_app.arguments(), array_pool); + if(id == ID_cprover_string_to_lower_case_func) + return util_make_unique( + return_code, fun_app.arguments(), array_pool); + return util_make_unique( return_code, fun_app, array_pool); } From 1137ffd066f09c6cbcce2c794d884c0a605b35ad Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 08:24:01 +0100 Subject: [PATCH 03/14] Improve documentation of add_axioms_for_to_lower_case --- ...ng_constraint_generator_transformation.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index e7184380b11..45bd5e7647d 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -230,15 +230,7 @@ exprt string_constraint_generatort::add_axioms_for_trim( /// Conversion of a string to lower case /// -/// Add axioms ensuring `res` corresponds to `str` in which uppercase characters -/// have been converted to lowercase. -/// These axioms are: -/// 1. \f$ |{\tt res}| = |{\tt str}| \f$ -/// 2. \f$ \forall i<|{\tt str}|.\ {\tt is\_upper\_case}({\tt str}[i])? -/// {\tt res}[i]={\tt str}[i]+diff : {\tt res}[i]={\tt str}[i] -/// \land {\tt str}[i]<{\tt 0x100} \f$ -/// where `diff` is the difference between lower case and upper case -/// characters: `diff = 'a'-'A' = 0x20`. +/// \copydoc add_axioms_for_to_lower_case(const array_string_exprt &, array_string_exprt &) /// /// \param f: function application with arguments integer `|res|`, character /// pointer `&res[0]`, refined_string `str` @@ -254,6 +246,17 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( return add_axioms_for_to_lower_case(res, str); } +/// Add axioms ensuring `res` corresponds to `str` in which uppercase characters +/// have been converted to lowercase. +/// These axioms are: +/// 1. res.length = str.length +/// 2. forall i < str.length. +/// res[i] = is_upper_case(str[i]) ? str[i] + diff : str[i] +/// && str[i] < 0x100 +/// +/// Where `diff` is the difference between lower case and upper case +/// characters: `diff = 'a'-'A' = 0x20` and `is_upper_case` is true for the +/// upper case characters of Basic Latin and Latin-1 supplement of unicode. exprt string_constraint_generatort::add_axioms_for_to_lower_case( const array_string_exprt &res, const array_string_exprt &str) From 836cbadea454a9fe3a3869bd156636a9161fbc4a Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 09:02:43 +0100 Subject: [PATCH 04/14] Extract an is_upper_case function --- ...ng_constraint_generator_transformation.cpp | 62 ++++++++++--------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 45bd5e7647d..ae05581993a 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -246,6 +246,39 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( return add_axioms_for_to_lower_case(res, str); } +/// Expression which is true for uppercase characters of the Basic Latin and +/// Latin-1 supplement of unicode. +static exprt is_upper_case(const exprt &character) +{ + const exprt char_A = from_integer('A', character.type()); + const exprt char_Z = from_integer('Z', character.type()); + exprt::operandst upper_case; + // Characters between 'A' and 'Z' are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt(char_A, ID_le, character), + binary_relation_exprt(character, ID_le, char_Z))); + + // Characters between 0xc0 (latin capital A with grave) + // and 0xd6 (latin capital O with diaeresis) are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt( + from_integer(0xc0, character.type()), ID_le, character), + binary_relation_exprt( + character, ID_le, from_integer(0xd6, character.type())))); + + // Characters between 0xd8 (latin capital O with stroke) + // and 0xde (latin capital thorn) are upper-case + upper_case.push_back( + and_exprt( + binary_relation_exprt( + from_integer(0xd8, character.type()), ID_le, character), + binary_relation_exprt( + character, ID_le, from_integer(0xde, character.type())))); + return disjunction(upper_case); +} + /// Add axioms ensuring `res` corresponds to `str` in which uppercase characters /// have been converted to lowercase. /// These axioms are: @@ -274,33 +307,6 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( constraints.push_back([&] { const symbol_exprt idx = fresh_univ_index("QA_lower_case", index_type); - - const exprt is_upper_case = disjunction([&] { - exprt::operandst upper_case; - // Characters between 'A' and 'Z' are upper-case - upper_case.push_back( - and_exprt( - binary_relation_exprt(char_A, ID_le, str[idx]), - binary_relation_exprt(str[idx], ID_le, char_Z))); - - // Characters between 0xc0 (latin capital A with grave) - // and 0xd6 (latin capital O with diaeresis) are upper-case - upper_case.push_back( - and_exprt( - binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]), - binary_relation_exprt( - str[idx], ID_le, from_integer(0xd6, char_type)))); - - // Characters between 0xd8 (latin capital O with stroke) - // and 0xde (latin capital thorn) are upper-case - upper_case.push_back( - and_exprt( - binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]), - binary_relation_exprt( - str[idx], ID_le, from_integer(0xde, char_type)))); - return upper_case; - }()); - const exprt conditional_convert = [&] { // The difference between upper-case and lower-case for the basic latin and // latin-1 supplement is 0x20. @@ -309,7 +315,7 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( const exprt non_converted = and_exprt( equal_exprt(res[idx], str[idx]), binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type))); - return if_exprt(is_upper_case, converted, non_converted); + return if_exprt(is_upper_case(str[idx]), converted, non_converted); }(); return string_constraintt( From be477c98aaa95b6d044f84fc4a19d04f48b19cf4 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 09:10:28 +0100 Subject: [PATCH 05/14] Remove assumptions that input char are < 0x100 This could conflict with other constraints and make SAT insatisfiable leading to false positive in verification. Instead it should be to the user of these builtin function to make sure they are used in a way that corresponds to the function they want. --- .../string_constraint_generator_transformation.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index ae05581993a..83dd730bd22 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -285,7 +285,6 @@ static exprt is_upper_case(const exprt &character) /// 1. res.length = str.length /// 2. forall i < str.length. /// res[i] = is_upper_case(str[i]) ? str[i] + diff : str[i] -/// && str[i] < 0x100 /// /// Where `diff` is the difference between lower case and upper case /// characters: `diff = 'a'-'A' = 0x20` and `is_upper_case` is true for the @@ -312,9 +311,7 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( // latin-1 supplement is 0x20. const exprt diff = from_integer(0x20, char_type); const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff)); - const exprt non_converted = and_exprt( - equal_exprt(res[idx], str[idx]), - binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type))); + const exprt non_converted = equal_exprt(res[idx], str[idx]); return if_exprt(is_upper_case(str[idx]), converted, non_converted); }(); From 53ccd3f94e06302cc2be81165509752c40c6f2ca Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 15:12:33 +0100 Subject: [PATCH 06/14] Refactor string_to_upper_case --- ...ng_constraint_generator_transformation.cpp | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83dd730bd22..933402dea82 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -353,29 +353,31 @@ exprt string_constraint_generatort::add_axioms_for_to_upper_case( // \todo Add support for locales using case mapping information // from the UnicodeData file. - equal_exprt a1(res.length(), str.length()); - lemmas.push_back(a1); - - symbol_exprt idx1=fresh_univ_index("QA_upper_case1", index_type); - const and_exprt is_lower_case( - binary_relation_exprt(char_a, ID_le, str[idx1]), - binary_relation_exprt(str[idx1], ID_le, char_z)); - minus_exprt diff(char_A, char_a); - equal_exprt convert(res[idx1], plus_exprt(str[idx1], diff)); - implies_exprt body1(is_lower_case, convert); - string_constraintt a2(idx1, zero_if_negative(res.length()), body1); - constraints.push_back(a2); + lemmas.push_back(equal_exprt(res.length(), str.length())); - symbol_exprt idx2=fresh_univ_index("QA_upper_case2", index_type); - const not_exprt is_not_lower_case( - and_exprt( - binary_relation_exprt(char_a, ID_le, str[idx2]), - binary_relation_exprt(str[idx2], ID_le, char_z))); - equal_exprt eq(res[idx2], str[idx2]); - implies_exprt body2(is_not_lower_case, eq); - string_constraintt a3(idx2, zero_if_negative(res.length()), body2); - constraints.push_back(a3); - return from_integer(0, signedbv_typet(32)); + constraints.push_back([&] { + const symbol_exprt idx1 = fresh_univ_index("QA_upper_case1", index_type); + const exprt is_lower_case = and_exprt( + binary_relation_exprt(char_a, ID_le, str[idx1]), + binary_relation_exprt(str[idx1], ID_le, char_z)); + const exprt diff = minus_exprt(char_A, char_a); + const exprt convert = equal_exprt(res[idx1], plus_exprt(str[idx1], diff)); + const exprt body = implies_exprt(is_lower_case, convert); + return string_constraintt(idx1, zero_if_negative(res.length()), body); + }()); + + constraints.push_back([&] { + const symbol_exprt idx2 = fresh_univ_index("QA_upper_case2", index_type); + const exprt is_not_lower_case = not_exprt( + and_exprt( + binary_relation_exprt(char_a, ID_le, str[idx2]), + binary_relation_exprt(str[idx2], ID_le, char_z))); + const exprt eq = equal_exprt(res[idx2], str[idx2]); + const exprt body2 = implies_exprt(is_not_lower_case, eq); + return string_constraintt(idx2, zero_if_negative(res.length()), body2); + }()); + + return from_integer(0, get_return_code_type()); } /// Conversion of a string to upper case From fe9071beb6bd0a3909f79cd8e2816bc20aa5dedc Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 06:39:07 +0100 Subject: [PATCH 07/14] Make to_upper_case work for Latin-1 supplement --- ...ng_constraint_generator_transformation.cpp | 53 ++++++++----------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 933402dea82..3a7c55975dc 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -279,6 +279,14 @@ static exprt is_upper_case(const exprt &character) return disjunction(upper_case); } +/// Expression which is true for lower_case characters of the Basic Latin and +/// Latin-1 supplement of unicode. +static exprt is_lower_case(const exprt &character) +{ + return is_upper_case( + minus_exprt(character, from_integer(0x20, character.type()))); +} + /// Add axioms ensuring `res` corresponds to `str` in which uppercase characters /// have been converted to lowercase. /// These axioms are: @@ -323,19 +331,14 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case( } /// Add axioms ensuring `res` corresponds to `str` in which lowercase characters -/// have been converted to uppercase. +/// of Basic Latin and Latin-1 supplement of unicode have been converted to +/// uppercase. /// /// These axioms are: -/// 1. \f$ |{\tt res}| = |{\tt str}| \f$ -/// 2. \f$ \forall i<|{\tt str}|.\ 'a'\le {\tt str}[i]\le 'z' -/// \Rightarrow {\tt res}[i]={\tt str}[i]+'A'-'a' \f$ -/// 3. \f$ \forall i<|{\tt str}|.\ \lnot ('a'\le {\tt str}[i] \le 'z') -/// \Rightarrow {\tt res}[i]={\tt str}[i] \f$ -/// Note that index expressions are only allowed in the body of universal -/// axioms, so we use a trivial premise and push our premise into the body. +/// 1. res.length = str.length +/// 2. forall i < str.length. +/// is_lower_case(str[i]) ? res[i] = str[i] - 0x20 : res[i] = str[i] /// -/// \todo We can reduce the number of constraints by merging -/// constraints 2 and 3. /// \param res: array of characters expression /// \param str: array of characters expression /// \return integer expression which is different from `0` when there is an @@ -350,31 +353,17 @@ exprt string_constraint_generatort::add_axioms_for_to_upper_case( exprt char_A=constant_char('A', char_type); exprt char_z=constant_char('z', char_type); - // \todo Add support for locales using case mapping information - // from the UnicodeData file. - lemmas.push_back(equal_exprt(res.length(), str.length())); constraints.push_back([&] { - const symbol_exprt idx1 = fresh_univ_index("QA_upper_case1", index_type); - const exprt is_lower_case = and_exprt( - binary_relation_exprt(char_a, ID_le, str[idx1]), - binary_relation_exprt(str[idx1], ID_le, char_z)); - const exprt diff = minus_exprt(char_A, char_a); - const exprt convert = equal_exprt(res[idx1], plus_exprt(str[idx1], diff)); - const exprt body = implies_exprt(is_lower_case, convert); - return string_constraintt(idx1, zero_if_negative(res.length()), body); - }()); - - constraints.push_back([&] { - const symbol_exprt idx2 = fresh_univ_index("QA_upper_case2", index_type); - const exprt is_not_lower_case = not_exprt( - and_exprt( - binary_relation_exprt(char_a, ID_le, str[idx2]), - binary_relation_exprt(str[idx2], ID_le, char_z))); - const exprt eq = equal_exprt(res[idx2], str[idx2]); - const exprt body2 = implies_exprt(is_not_lower_case, eq); - return string_constraintt(idx2, zero_if_negative(res.length()), body2); + const symbol_exprt idx = fresh_univ_index("QA_upper_case", index_type); + const exprt converted = + minus_exprt(str[idx], from_integer(0x20, char_type)); + return string_constraintt( + idx, + zero_if_negative(res.length()), + equal_exprt( + res[idx], if_exprt(is_lower_case(str[idx]), converted, str[idx]))); }()); return from_integer(0, get_return_code_type()); From e2961b5fe97a6e43b332315dffdb9d8cad2419b7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 15:16:15 +0100 Subject: [PATCH 08/14] Add builtin class for string_to_upper_case --- .../refinement/string_builtin_function.cpp | 27 +++++++++++++-- .../refinement/string_builtin_function.h | 33 +++++++++++++++++++ .../refinement/string_constraint_generator.h | 6 ++-- .../refinement/string_refinement_util.cpp | 4 +++ 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/src/solvers/refinement/string_builtin_function.cpp b/src/solvers/refinement/string_builtin_function.cpp index ab3734cce41..81f8951fb4e 100644 --- a/src/solvers/refinement/string_builtin_function.cpp +++ b/src/solvers/refinement/string_builtin_function.cpp @@ -187,6 +187,12 @@ exprt string_set_char_builtin_functiont::length_constraint() const equal_exprt(return_code, return_value)); } +static bool eval_is_upper_case(const mp_integer &c) +{ + return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) || + (0xd8 <= c && c <= 0xde); +} + optionalt string_to_lower_case_builtin_functiont::eval( const std::function &get_value) const { @@ -195,9 +201,7 @@ optionalt string_to_lower_case_builtin_functiont::eval( return {}; for(mp_integer &c : input_opt.value()) { - if( - ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) || - (0xd8 <= c && c <= 0xde)) + if(eval_is_upper_case(c)) c += 0x20; } const auto length = @@ -206,6 +210,23 @@ optionalt string_to_lower_case_builtin_functiont::eval( return make_string(input_opt.value(), type); } +optionalt string_to_upper_case_builtin_functiont::eval( + const std::function &get_value) const +{ + auto input_opt = eval_string(input, get_value); + if(!input_opt) + return {}; + for(mp_integer &c : input_opt.value()) + { + if(eval_is_upper_case(c - 0x20)) + c -= 0x20; + } + const auto length = + from_integer(input_opt.value().size(), result.length().type()); + const array_typet type(result.type().subtype(), length); + return make_string(input_opt.value(), type); +} + std::vector string_insertion_builtin_functiont::eval( const std::vector &input1_value, const std::vector &input2_value, diff --git a/src/solvers/refinement/string_builtin_function.h b/src/solvers/refinement/string_builtin_function.h index 685df17f650..c7038f84ead 100644 --- a/src/solvers/refinement/string_builtin_function.h +++ b/src/solvers/refinement/string_builtin_function.h @@ -210,6 +210,39 @@ class string_to_lower_case_builtin_functiont }; }; +/// Converting each lowercase character of Basic Latin and Latin-1 supplement +/// to the corresponding uppercase character. +class string_to_upper_case_builtin_functiont + : public string_transformation_builtin_functiont +{ +public: + string_to_upper_case_builtin_functiont( + const exprt &return_code, + const std::vector &fun_args, + array_poolt &array_pool) + : string_transformation_builtin_functiont(return_code, fun_args, array_pool) + { + } + + optionalt + eval(const std::function &get_value) const override; + + std::string name() const override + { + return "to_upper_case"; + } + + exprt add_constraints(string_constraint_generatort &generator) const override + { + return generator.add_axioms_for_to_upper_case(result, input); + }; + + exprt length_constraint() const override + { + return equal_exprt(result.length(), input.length()); + }; +}; + /// String inserting a string into another one class string_insertion_builtin_functiont : public string_builtin_functiont { diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 5043dd235f6..a7d42c514d2 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -178,6 +178,9 @@ class string_constraint_generatort final exprt add_axioms_for_to_lower_case( const array_string_exprt &res, const array_string_exprt &str); + exprt add_axioms_for_to_upper_case( + const array_string_exprt &res, + const array_string_exprt &expr); private: symbol_exprt fresh_boolean(const irep_idt &prefix); @@ -338,9 +341,6 @@ class string_constraint_generatort final exprt add_axioms_for_substring(const function_application_exprt &f); exprt add_axioms_for_to_lower_case(const function_application_exprt &f); exprt add_axioms_for_to_upper_case(const function_application_exprt &f); - exprt add_axioms_for_to_upper_case( - const array_string_exprt &res, - const array_string_exprt &expr); exprt add_axioms_for_trim(const function_application_exprt &f); exprt add_axioms_for_code_point( diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 90eaa8b23ad..85ac59305f5 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -236,6 +236,10 @@ static std::unique_ptr to_string_builtin_function( return util_make_unique( return_code, fun_app.arguments(), array_pool); + if(id == ID_cprover_string_to_upper_case_func) + return util_make_unique( + return_code, fun_app.arguments(), array_pool); + return util_make_unique( return_code, fun_app, array_pool); } From 579d00cbde049dfd5c0eeae2404e85aa11e4959b Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 15:37:08 +0100 Subject: [PATCH 09/14] Remove redundant function application ID check These functions are already handled by the string dependency graph. They should not appear in the add_axioms_for_function_application input. --- .../refinement/string_constraint_generator_main.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 91ec22bb45a..597e909d6c7 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -456,10 +456,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) res=add_axioms_from_literal(expr); - else if(id==ID_cprover_string_concat_func) - res=add_axioms_for_concat(expr); - else if(id==ID_cprover_string_concat_char_func) - res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_code_point_func) res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) @@ -480,18 +476,10 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) res=add_axioms_for_trim(expr); - else if(id==ID_cprover_string_to_lower_case_func) - res=add_axioms_for_to_lower_case(expr); - else if(id==ID_cprover_string_to_upper_case_func) - res=add_axioms_for_to_upper_case(expr); - else if(id==ID_cprover_string_char_set_func) - res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_empty_string_func) res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) res=add_axioms_for_copy(expr); - else if(id==ID_cprover_string_of_int_func) - res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) From 71de2e3236315e51cd592445100ef2089f0ec98a Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 16:03:45 +0100 Subject: [PATCH 10/14] Tests for String.toLowerCase --- .../jbmc-strings/StringToLowerCase/Test.class | Bin 0 -> 1713 bytes .../jbmc-strings/StringToLowerCase/Test.java | 50 ++++++++++++++++++ .../StringToLowerCase/test_dependency.desc | 10 ++++ .../StringToLowerCase/test_det.desc | 10 ++++ .../StringToLowerCase/test_nondet.desc | 10 ++++ 5 files changed, 80 insertions(+) create mode 100644 jbmc/regression/jbmc-strings/StringToLowerCase/Test.class create mode 100644 jbmc/regression/jbmc-strings/StringToLowerCase/Test.java create mode 100644 jbmc/regression/jbmc-strings/StringToLowerCase/test_dependency.desc create mode 100644 jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc create mode 100644 jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/Test.class b/jbmc/regression/jbmc-strings/StringToLowerCase/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..800af2c1930e05846f6e56e3663d2bfaef7c440f GIT binary patch literal 1713 zcmah}T~pge6g_K4R)7M&L+9lA-B8 z36Fj0V`)3%%rrB}TOazHI-TB?C5wV*>WB5+y;t{~d+*u(>z_Y90+>fd#we!Zn3fU2 zl!O@>vzQZ`FD2ZR@Rfvl315qY1>yQe!nYD`33E}xZ3#=l{7%B1K4frL!o4^OGKNIX zWf?`3;#gt0cvW*8Zo7tMIwix=stv9)ux*Cel3^O|9fqFVOa z+nXYo+&Xs|(z(g z8alTb=Cx`~=R5UXV{gCl+O!Vd*pAygJo@47kA>x8X=U~PgNKhEm)F)eo;-b4*?j(D z>*cF$0#|S!;|d<&VUQzCu_IEv2~r#eDOUcI`Dh9ri7FqX%%D_uE!)jH-tb981qEwZXBchEA!}N~sgc;sdRB}zTcD^# z1sftNb1I649mXj53}Y0iQA{|cLLl1}+qP&-Q8Cdto+x;VXAG$@a-+J(Yb3!_M{OvE zglmJg{kp7T%u|YSJVMynWVROAxFWnEfNe;S}+qgn^dz9V_mdc1Ok<|+(6iw^rw7W$A zOq||$5B94FM3&Z=cPLy}Fixuowdf^@(Er^Jh`f4_o^oD|w$R%`Y%8zEJC@Y3`Z|^z zSS=(@C`B9OGU8-MArU8)AjfDwLAyEhVG%M#eSxb!_%8(AL($*oTToKzpAb`%CwcWk z3;oArr%rrQYf}{9aS9D}KyVa$TEoqmD4$xytA7DW0_!ItDJU4gAQ8+UO};@)(i?db z!?-1$O^jfb8oWWxeMtpe^9qV638FoL>puAtA|EBsPX13Q=L%3o0T+V;1WUgJVzfT? z4x_&zeT@9HI@kimj(@-qwSV}Ncbsk?ze6H8o)!&mS202SLGw#eBU9AM5#l$79I?-1 z0aJAOW}s8M1#1Pp$w=aF%*Bx#q9IInBr;D`h~I#kX<_6T@#xQp_WX{tkV_jVkc&vv Oi->YhDCdUf&i@O9T!+yB literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/Test.java b/jbmc/regression/jbmc-strings/StringToLowerCase/Test.java new file mode 100644 index 00000000000..3caa33d3377 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/Test.java @@ -0,0 +1,50 @@ +public class Test { + public String det() + { + StringBuilder builder = new StringBuilder(); + builder.append("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".toLowerCase()); + builder.append("abcdeghijklmnopfrstuqwxyzABCDvFGHIJKLMENOPQRSTUVWXYZ".toLowerCase()); + builder.append("abcdeghijlmnopqrstvwxyzABCDEFGHIJuKLMNOPQRSfkTUVWXYZ".toLowerCase()); + builder.append("acdefghijklmnopqrsuvwxyzABCDEFbGHIJKLMNOPtQRSTUVWXYZ".toLowerCase()); + builder.append("abcdfghijklmnopqrstuvwxyzABCDEFGHIJeKLMNOPQRSTUVWXYZ".toLowerCase()); + return builder.toString(); + } + + public String nonDet(String s) + { + if(s.length() < 20) + return "Short string"; + if(!s.startsWith("A")) + return "String not starting with A"; + + StringBuilder builder = new StringBuilder(); + builder.append(s.toLowerCase()); + builder.append(s.toLowerCase()); + builder.append(":"); + builder.append(s); + builder.append(":"); + builder.append(s.toLowerCase()); + builder.append(s.toLowerCase()); + return builder.toString(); + } + + public String withDependency(String s, boolean b) + { + // Filter + if(s == null || s.length() < 20) + return "Short string"; + if(!s.endsWith("A")) + return "String not ending with A"; + + // Act + String result = s + s.toLowerCase(); + + // Assert + if(b) { + assert(result.endsWith("a")); + } else { + assert(!result.endsWith("a")); + } + return result; + } +} diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/test_dependency.desc b/jbmc/regression/jbmc-strings/StringToLowerCase/test_dependency.desc new file mode 100644 index 00000000000..6015585d6e8 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/test_dependency.desc @@ -0,0 +1,10 @@ +CORE +Test.class +--function Test.withDependency --verbosity 10 --max-nondet-string-length 10000 +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 44 .*: SUCCESS +assertion at file Test.java line 46 .*: FAILURE +-- +-- +Check that when there are dependencies, axioms are adde correctly. diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc b/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc new file mode 100644 index 00000000000..eadefb44241 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc @@ -0,0 +1,10 @@ +CORE +Test.class +--function Test.det --verbosity 10 --cover location +^EXIT=0$ +^SIGNAL=0$ +coverage.* file Test.java line 9 .*: SATISFIED +-- +adding lemma .*nondet_infinite_array +-- +Check that using the string dependence informations, no lemma involving arrays is added diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc b/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc new file mode 100644 index 00000000000..30ef54cd492 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc @@ -0,0 +1,10 @@ +CORE +Test.class +--function Test.nonDet --verbosity 10 --cover location --max-nondet-string-length 1000 +^EXIT=0$ +^SIGNAL=0$ +coverage.* file Test.java line 27 .*: SATISFIED +-- +adding lemma .*nondet_infinite_array +-- +Check that using the string dependence informations, no lemma involving arrays is added From 58307a04e035d1aaf92e3a5f934cf06099200d85 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Aug 2018 16:06:20 +0100 Subject: [PATCH 11/14] Tests for String.toUpperCase --- .../jbmc-strings/StringToUpperCase/Test.class | Bin 0 -> 1737 bytes .../jbmc-strings/StringToUpperCase/Test.java | 50 ++++++++++++++++++ .../StringToUpperCase/test_dependency.desc | 10 ++++ .../StringToUpperCase/test_det.desc | 10 ++++ .../StringToUpperCase/test_nondet.desc | 10 ++++ 5 files changed, 80 insertions(+) create mode 100644 jbmc/regression/jbmc-strings/StringToUpperCase/Test.class create mode 100644 jbmc/regression/jbmc-strings/StringToUpperCase/Test.java create mode 100644 jbmc/regression/jbmc-strings/StringToUpperCase/test_dependency.desc create mode 100644 jbmc/regression/jbmc-strings/StringToUpperCase/test_det.desc create mode 100644 jbmc/regression/jbmc-strings/StringToUpperCase/test_nondet.desc diff --git a/jbmc/regression/jbmc-strings/StringToUpperCase/Test.class b/jbmc/regression/jbmc-strings/StringToUpperCase/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..91c3f958e3a3d2addf9aee2ac260df2107ce9651 GIT binary patch literal 1737 zcmah}T~pgu5IxrzOUQCCip^I@>V}j+jDZkJ8$v_ChJ?gG67!KT(+AnU7=a@ZNrtBX zBs}(|kEQL5GtU6qSmMsdNsUD=gd-v|1vwN=o`sa^N0A{eQ;2JU#CKQB_ zkuj-Y3U|d~TE;!Gn33_dj9D4q2=BMTbzjDuj0eJ;m+_s9hr(Qt@u&*}Sd_6OA*-NI zB+e;V#)^c;3|DXIj>B!&uuLasIC`z&yANlF8 zoEYwYjvsNe%+1m}hTCm2?Y2oJ;R%C0U#j{vl!8^WOMJx;pu0A?<4y>`DsJElhQ5p7 zmug0}%x#7ly;v&qy~@6Ea9Dj~T1Rhf$E_Wo{P6C_#ieX+dFAnw)u+$$YwH`&U%V`A zZf)ki4<>}DUO>dmj9DOMIOWF&$g;xC|+w--#hf{i({8i zp9ftGP`d$azuNzh<*j;tH5F@u>N++U)WW`HyD7(004a$Y{2VVB1{<14nU>hpNpWT+ zC1#t_DQZr|OA*z7E{Y}|#Hbj;bqZXhm|#?kP35sMy37Lp+GwC?4<< z>F}JZQHmkzT6yb)+gaTqWkmENp*4m~8=YO2nJ*KPI;Gj7qkbCMwVL@ORc=Z z=owj^U_#{dPSNT*eVOpy`2hAS8H|nq5zi^MZep0;De_tLa|zLR;Uhw?KcFL@*1~mk z))CoBYf>F@!|G~TO3R8itvb{*O3^5C0}|O`$b?Cwl0&o}q16<+Fb9RAZs9Ax_%Bqt zgQCAr*Aa^+enLdMa+cP*>xiF{-E-!HS{tVT4@qEXnm{Jj^bQsmqI_r}kN+%URIHnT z#G#@GNdnl91oa__G4x`Zex3Kpvq0@#!5}uM$*+jOh({=-MhW(9jQZfu2z;1!_R@bs zy+q)uAdqMh5GZ{LglK)}JqCY6@)YTe)>{X~-u#F@YJdM{&z)$v-y_;|XGDV=EJg@F zXo4}CVVs&dK Date: Mon, 6 Aug 2018 15:15:35 +0100 Subject: [PATCH 12/14] Document eval_string --- src/solvers/refinement/string_builtin_function.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/solvers/refinement/string_builtin_function.cpp b/src/solvers/refinement/string_builtin_function.cpp index 81f8951fb4e..bb394347512 100644 --- a/src/solvers/refinement/string_builtin_function.cpp +++ b/src/solvers/refinement/string_builtin_function.cpp @@ -8,7 +8,9 @@ #include "string_constraint_generator.h" -/// Get the valuation of the string, given a valuation +/// Given a function `get_value` which gives a valuation to expressions, attempt +/// to find the current value of the array `a`. If the valuation of some +/// characters are missing, then return an empty optional. static optionalt> eval_string( const array_string_exprt &a, const std::function &get_value); From ecc0e43afaa179a73206293e32196459d0473051 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 15:59:20 +0100 Subject: [PATCH 13/14] Document eval_is_upper_case --- src/solvers/refinement/string_builtin_function.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/solvers/refinement/string_builtin_function.cpp b/src/solvers/refinement/string_builtin_function.cpp index bb394347512..d9e67a685aa 100644 --- a/src/solvers/refinement/string_builtin_function.cpp +++ b/src/solvers/refinement/string_builtin_function.cpp @@ -191,6 +191,11 @@ exprt string_set_char_builtin_functiont::length_constraint() const static bool eval_is_upper_case(const mp_integer &c) { + // Characters between 'A' and 'Z' are upper-case + // Characters between 0xc0 (latin capital A with grave) + // and 0xd6 (latin capital O with diaeresis) are upper-case + // Characters between 0xd8 (latin capital O with stroke) + // and 0xde (latin capital thorn) are upper-case return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) || (0xd8 <= c && c <= 0xde); } From a44468bea3d1ff3a482bb3528c0cb0058253f549 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 6 Aug 2018 15:59:35 +0100 Subject: [PATCH 14/14] Document string_builtin_function::eval --- src/solvers/refinement/string_builtin_function.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/solvers/refinement/string_builtin_function.h b/src/solvers/refinement/string_builtin_function.h index c7038f84ead..72e526ab102 100644 --- a/src/solvers/refinement/string_builtin_function.h +++ b/src/solvers/refinement/string_builtin_function.h @@ -30,6 +30,10 @@ class string_builtin_functiont return {}; } + /// Given a function `get_value` which gives a valuation to expressions, + /// attempt to find the result of the builtin function. + /// If not enough information can be gathered from `get_value`, return an + /// empty optional. virtual optionalt eval(const std::function &get_value) const = 0;