From 379705fbd2fe3122aaab6eb78dc2096a33c74d79 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 5 Feb 2018 18:14:56 +0000 Subject: [PATCH 1/3] Process array_equal the same way as array_{replace,copy} array_equal was handled as a special snowflake in both goto-program conversion and symbolic execution. This simplifies the code and removes a sharing-unaware pass that ran over all expressions. This commit is only refactoring, no behavioural changes expected. This commit also moves a regression test from cbmc-from-CVS to the cbmc folder, and extends it to cover array_replace. --- .../cbmc-from-CVS/Array_operations1/main.c | 37 ------------ .../cbmc-from-CVS/Array_operations1/test.desc | 8 --- regression/cbmc/Array_operations1/main.c | 57 +++++++++++++++++++ regression/cbmc/Array_operations1/test.desc | 10 ++++ src/goto-programs/builtin_functions.cpp | 51 ++--------------- src/goto-symex/goto_symex.h | 3 +- src/goto-symex/symex_clean_expr.cpp | 27 --------- src/goto-symex/symex_other.cpp | 52 +++++++++++++++++ 8 files changed, 126 insertions(+), 119 deletions(-) delete mode 100644 regression/cbmc-from-CVS/Array_operations1/main.c delete mode 100644 regression/cbmc-from-CVS/Array_operations1/test.desc create mode 100644 regression/cbmc/Array_operations1/main.c create mode 100644 regression/cbmc/Array_operations1/test.desc diff --git a/regression/cbmc-from-CVS/Array_operations1/main.c b/regression/cbmc-from-CVS/Array_operations1/main.c deleted file mode 100644 index 48825f6bebf..00000000000 --- a/regression/cbmc-from-CVS/Array_operations1/main.c +++ /dev/null @@ -1,37 +0,0 @@ -void testA() -{ - char arrayA1[100], arrayA2[100]; - _Bool cmp; - int index; - - cmp=__CPROVER_array_equal(arrayA1, arrayA2); - - __CPROVER_assume(index>=0 && index<100); - __CPROVER_assume(cmp); - - __CPROVER_assert(arrayA1[index]==arrayA2[index], "testA arrays are equal"); -} - -void testB() -{ - char arrayB1[100], arrayB2[100]; - - arrayB2[10]=11; - __CPROVER_array_copy(arrayB1, arrayB2); - - __CPROVER_assert(arrayB1[10]==11, "arrayB1[10] is OK"); -} - -void testC() -{ - char arrayC1[100]; - __CPROVER_array_set(arrayC1, 111); - __CPROVER_assert(arrayC1[44]==111, "arrayC1[44] is OK"); -} - -int main() -{ - testA(); - testB(); - testC(); -} diff --git a/regression/cbmc-from-CVS/Array_operations1/test.desc b/regression/cbmc-from-CVS/Array_operations1/test.desc deleted file mode 100644 index 9efefbc7362..00000000000 --- a/regression/cbmc-from-CVS/Array_operations1/test.desc +++ /dev/null @@ -1,8 +0,0 @@ -CORE -main.c - -^EXIT=0$ -^SIGNAL=0$ -^VERIFICATION SUCCESSFUL$ --- -^warning: ignoring diff --git a/regression/cbmc/Array_operations1/main.c b/regression/cbmc/Array_operations1/main.c new file mode 100644 index 00000000000..94e6dda43e6 --- /dev/null +++ b/regression/cbmc/Array_operations1/main.c @@ -0,0 +1,57 @@ +void test_equal() +{ + char array1[100], array2[100]; + _Bool cmp; + int index; + + cmp = __CPROVER_array_equal(array1, array2); + + __CPROVER_assume(index >= 0 && index < 100); + __CPROVER_assume(cmp); + + __CPROVER_assert(array1[index] == array2[index], "arrays are equal"); +} + +void test_copy() +{ + char array1[100], array2[100], array3[90]; + + array2[10] = 11; + array3[10] = 11; + array2[99] = 42; + __CPROVER_array_copy(array1, array2); + + __CPROVER_assert(array1[10] == 11, "array1[10] is OK"); + __CPROVER_assert(array1[99] == 42, "array1[99] is OK"); + + __CPROVER_array_copy(array1, array3); + __CPROVER_assert(array1[10] == 11, "array1[10] is OK"); + __CPROVER_assert(array1[99] == 42, "expected to fail"); +} + +void test_replace() +{ + char array1[100], array2[90]; + + array1[99] = 42; + array2[10] = 11; + __CPROVER_array_replace(array1, array2); + + __CPROVER_assert(array1[10] == 11, "array1[10] is OK"); + __CPROVER_assert(array1[99] == 42, "array1[99] is OK"); +} + +void test_set() +{ + char array1[100]; + __CPROVER_array_set(array1, 111); + __CPROVER_assert(array1[44] == 111, "array1[44] is OK"); +} + +int main() +{ + test_equal(); + test_copy(); + test_replace(); + test_set(); +} diff --git a/regression/cbmc/Array_operations1/test.desc b/regression/cbmc/Array_operations1/test.desc new file mode 100644 index 00000000000..2bbe617db7a --- /dev/null +++ b/regression/cbmc/Array_operations1/test.desc @@ -0,0 +1,10 @@ +CORE +main.c + +^EXIT=10$ +^SIGNAL=0$ +^\[test_copy\.assertion\.4\] expected to fail: FAILURE$ +^\*\* 1 of 8 failed +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/src/goto-programs/builtin_functions.cpp b/src/goto-programs/builtin_functions.cpp index 60fe569f845..541861fc662 100644 --- a/src/goto-programs/builtin_functions.cpp +++ b/src/goto-programs/builtin_functions.cpp @@ -839,51 +839,12 @@ void goto_convertt::do_array_op( array_op_statement.operands()=arguments; array_op_statement.add_source_location()=function.source_location(); - copy(array_op_statement, OTHER, dest); -} - -void goto_convertt::do_array_equal( - const exprt &lhs, - const exprt &function, - const exprt::operandst &arguments, - goto_programt &dest) -{ - if(arguments.size()!=2) - { - error().source_location=function.find_source_location(); - error() << "array_equal expects two arguments" << eom; - throw 0; - } - - const typet &arg0_type=ns.follow(arguments[0].type()); - const typet &arg1_type=ns.follow(arguments[1].type()); + // lhs is only used with array_equal, in all other cases it should be nil (as + // they are of type void) + if(id == ID_array_equal) + array_op_statement.copy_to_operands(lhs); - if(arg0_type.id()!=ID_pointer || - arg1_type.id()!=ID_pointer) - { - error().source_location=function.find_source_location(); - error() << "array_equal expects pointer arguments" << eom; - throw 0; - } - - if(lhs.is_not_nil()) - { - code_assignt assignment; - - // add dereferencing here - dereference_exprt lhs_array, rhs_array; - lhs_array.op0()=arguments[0]; - rhs_array.op0()=arguments[1]; - lhs_array.type()=arg0_type.subtype(); - rhs_array.type()=arg1_type.subtype(); - - assignment.lhs()=lhs; - assignment.rhs()=binary_exprt( - lhs_array, ID_array_equal, rhs_array, lhs.type()); - assignment.add_source_location()=function.source_location(); - - convert(assignment, dest); - } + copy(array_op_statement, OTHER, dest); } bool is_lvalue(const exprt &expr) @@ -1214,7 +1175,7 @@ void goto_convertt::do_function_call_symbol( } else if(identifier==CPROVER_PREFIX "array_equal") { - do_array_equal(lhs, function, arguments, dest); + do_array_op(ID_array_equal, lhs, function, arguments, dest); } else if(identifier==CPROVER_PREFIX "array_set") { diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index f27a1f1ffac..ddf17a90144 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -245,11 +245,10 @@ class goto_symext // this does the following: // a) rename non-det choices // b) remove pointer dereferencing - // c) rewrite array_equal expression into equality + // c) clean up byte_extract on the lhs of an assignment void clean_expr( exprt &, statet &, bool write); - void replace_array_equal(exprt &); void trigger_auto_object(const exprt &, statet &); void initialize_auto_object(const exprt &, statet &); void process_array_expr(exprt &); diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 5179c5f88f1..c44174eb03a 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -143,31 +143,6 @@ void goto_symext::process_array_expr(exprt &expr) process_array_expr(*it); } -void goto_symext::replace_array_equal(exprt &expr) -{ - if(expr.id()==ID_array_equal) - { - assert(expr.operands().size()==2); - - // we expect two index expressions - process_array_expr(expr.op0()); - process_array_expr(expr.op1()); - - // type checking - if(ns.follow(expr.op0().type())!= - ns.follow(expr.op1().type())) - expr=false_exprt(); - else - { - equal_exprt equality_expr(expr.op0(), expr.op1()); - expr.swap(equality_expr); - } - } - - Forall_operands(it, expr) - replace_array_equal(*it); -} - /// Rewrite index/member expressions in byte_extract to offset static void adjust_byte_extract_rec(exprt &expr, const namespacet &ns) { @@ -203,6 +178,4 @@ void goto_symext::clean_expr( // lhs if(write) adjust_byte_extract_rec(expr, ns); - - replace_array_equal(expr); } diff --git a/src/goto-symex/symex_other.cpp b/src/goto-symex/symex_other.cpp index 03b0ab5f211..74b6a62e35a 100644 --- a/src/goto-symex/symex_other.cpp +++ b/src/goto-symex/symex_other.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -259,6 +260,57 @@ void goto_symext::symex_other( assignment.rhs()=array_of_exprt(clean_code.op1(), array_type); symex_assign(state, assignment); } + else if(statement==ID_array_equal) + { + DATA_INVARIANT( + code.operands().size() == 3, + "array_equal expected to take three arguments"); + + codet clean_code(code); + + // we need to add dereferencing for the first two operands + dereference_exprt d0, d1; + d0.op0()=code.op0(); + d0.type()=empty_typet(); + d1.op0()=code.op1(); + d1.type()=empty_typet(); + + clean_code.op0()=d0; + clean_code.op1()=d1; + + clean_expr(clean_code.op0(), state, false); + exprt op0_offset=from_integer(0, index_type()); + if(clean_code.op0().id()==byte_extract_id() && + clean_code.op0().type().id()==ID_empty) + { + op0_offset=to_byte_extract_expr(clean_code.op0()).offset(); + clean_code.op0()=clean_code.op0().op0(); + } + clean_expr(clean_code.op1(), state, false); + exprt op1_offset=from_integer(0, index_type()); + if(clean_code.op1().id()==byte_extract_id() && + clean_code.op1().type().id()==ID_empty) + { + op1_offset=to_byte_extract_expr(clean_code.op1()).offset(); + clean_code.op1()=clean_code.op1().op0(); + } + + process_array_expr(clean_code.op0()); + clean_expr(clean_code.op0(), state, false); + process_array_expr(clean_code.op1()); + clean_expr(clean_code.op1(), state, false); + + if(!base_type_eq(clean_code.op0().type(), + clean_code.op1().type(), ns)) + { + clean_code.op2() = false_exprt(); + } + + code_assignt assignment( + clean_code.op2(), + equal_exprt(clean_code.op0(), clean_code.op1())); + symex_assign(state, assignment); + } else if(statement==ID_user_specified_predicate || statement==ID_user_specified_parameter_predicates || statement==ID_user_specified_return_predicates) From f4fb09948a37c8e36572af7c5566b8ff076b35b7 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 5 Feb 2018 18:15:26 +0000 Subject: [PATCH 2/3] Fix process_array_expr to take into account pointer offset --- regression/cbmc/memcpy2/main.c | 18 +++ regression/cbmc/memcpy2/test.desc | 8 + src/goto-symex/goto_symex.h | 1 - src/goto-symex/symex_clean_expr.cpp | 163 +++++++++------------ src/goto-symex/symex_other.cpp | 219 +++++++++++----------------- 5 files changed, 176 insertions(+), 233 deletions(-) create mode 100644 regression/cbmc/memcpy2/main.c create mode 100644 regression/cbmc/memcpy2/test.desc diff --git a/regression/cbmc/memcpy2/main.c b/regression/cbmc/memcpy2/main.c new file mode 100644 index 00000000000..a2737fafd4a --- /dev/null +++ b/regression/cbmc/memcpy2/main.c @@ -0,0 +1,18 @@ +#include +#include + +int main() +{ + char A[3] = {'a', 'b', 'c'}; + char *p = A; + char v1, v2; + + memcpy(&v1, p, 1); + ++p; + memcpy(&v2, p, 1); + + assert(v1 == 'a'); + assert(v2 == 'b'); + + return 0; +} diff --git a/regression/cbmc/memcpy2/test.desc b/regression/cbmc/memcpy2/test.desc new file mode 100644 index 00000000000..9efefbc7362 --- /dev/null +++ b/regression/cbmc/memcpy2/test.desc @@ -0,0 +1,8 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index ddf17a90144..b3ebee34bf0 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -252,7 +252,6 @@ class goto_symext void trigger_auto_object(const exprt &, statet &); void initialize_auto_object(const exprt &, statet &); void process_array_expr(exprt &); - void process_array_expr_rec(exprt &, const typet &) const; exprt make_auto_object(const typet &, statet &); virtual void dereference(exprt &, statet &, const bool write); diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index c44174eb03a..5e1e0cf4c91 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -15,77 +15,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include - +#include #include -void goto_symext::process_array_expr_rec( - exprt &expr, - const typet &type) const -{ - if(expr.id()==ID_if) - { - if_exprt &if_expr=to_if_expr(expr); - process_array_expr_rec(if_expr.true_case(), type); - process_array_expr_rec(if_expr.false_case(), type); - } - else if(expr.id()==ID_index) - { - // strip index - index_exprt &index_expr=to_index_expr(expr); - exprt tmp=index_expr.array(); - expr.swap(tmp); - } - else if(expr.id()==ID_typecast) - { - // strip - exprt tmp=to_typecast_expr(expr).op0(); - expr.swap(tmp); - process_array_expr_rec(expr, type); - } - else if(expr.id()==ID_address_of) - { - // strip - exprt tmp=to_address_of_expr(expr).op0(); - expr.swap(tmp); - process_array_expr_rec(expr, type); - } - else if(expr.id()==ID_byte_extract_little_endian || - expr.id()==ID_byte_extract_big_endian) - { - // pick the root object - exprt tmp=to_byte_extract_expr(expr).op(); - expr.swap(tmp); - process_array_expr_rec(expr, type); - } - else if(expr.id()==ID_symbol && - expr.get_bool(ID_C_SSA_symbol) && - to_ssa_expr(expr).get_original_expr().id()==ID_index) - { - const ssa_exprt &ssa=to_ssa_expr(expr); - const index_exprt &index_expr=to_index_expr(ssa.get_original_expr()); - exprt tmp=index_expr.array(); - expr.swap(tmp); - } - else - { - Forall_operands(it, expr) - { - typet t=it->type(); - process_array_expr_rec(*it, t); - } - } - - if(!base_type_eq(expr.type(), type, ns)) - { - byte_extract_exprt be(byte_extract_id()); - be.type()=type; - be.op()=expr; - be.offset()=from_integer(0, index_type()); - - expr.swap(be); - } -} - +/// Given an expression, find the root object and the offset into it. +/// +/// The extra complication to be considered here is that the expression may +/// have any number of ternary expressions mixed with type casts. void goto_symext::process_array_expr(exprt &expr) { // This may change the type of the expression! @@ -94,26 +30,21 @@ void goto_symext::process_array_expr(exprt &expr) { if_exprt &if_expr=to_if_expr(expr); process_array_expr(if_expr.true_case()); + process_array_expr(if_expr.false_case()); - process_array_expr_rec(if_expr.false_case(), - if_expr.true_case().type()); + if(!base_type_eq(if_expr.true_case(), if_expr.false_case(), ns)) + { + byte_extract_exprt be( + byte_extract_id(), + if_expr.false_case(), + from_integer(0, index_type()), + if_expr.true_case().type()); + + if_expr.false_case().swap(be); + } if_expr.type()=if_expr.true_case().type(); } - else if(expr.id()==ID_index) - { - // strip index - index_exprt &index_expr=to_index_expr(expr); - exprt tmp=index_expr.array(); - expr.swap(tmp); - } - else if(expr.id()==ID_typecast) - { - // strip - exprt tmp=to_typecast_expr(expr).op0(); - expr.swap(tmp); - process_array_expr(expr); - } else if(expr.id()==ID_address_of) { // strip @@ -121,14 +52,6 @@ void goto_symext::process_array_expr(exprt &expr) expr.swap(tmp); process_array_expr(expr); } - else if(expr.id()==ID_byte_extract_little_endian || - expr.id()==ID_byte_extract_big_endian) - { - // pick the root object - exprt tmp=to_byte_extract_expr(expr).op(); - expr.swap(tmp); - process_array_expr(expr); - } else if(expr.id()==ID_symbol && expr.get_bool(ID_C_SSA_symbol) && to_ssa_expr(expr).get_original_expr().id()==ID_index) @@ -137,10 +60,58 @@ void goto_symext::process_array_expr(exprt &expr) const index_exprt &index_expr=to_index_expr(ssa.get_original_expr()); exprt tmp=index_expr.array(); expr.swap(tmp); + + process_array_expr(expr); + } + else if(expr.id() != ID_symbol) + { + object_descriptor_exprt ode; + ode.build(expr, ns); + do_simplify(ode.offset()); + + expr = ode.root_object(); + + if(!ode.offset().is_zero()) + { + if(expr.type().id() != ID_array) + { + exprt array_size = size_of_expr(expr.type(), ns); + do_simplify(array_size); + expr = + byte_extract_exprt( + byte_extract_id(), + expr, + from_integer(0, index_type()), + array_typet(char_type(), array_size)); + } + + // given an array type T[N], i.e., an array of N elements of type T, and a + // byte offset B, compute the array offset B/sizeof(T) and build a new + // type T[N-(B/sizeof(T))] + const array_typet &prev_array_type = to_array_type(expr.type()); + const typet &array_size_type = prev_array_type.size().type(); + const typet &subtype = prev_array_type.subtype(); + + exprt new_offset(ode.offset()); + if(new_offset.type() != array_size_type) + new_offset.make_typecast(array_size_type); + exprt subtype_size = size_of_expr(subtype, ns); + if(subtype_size.type() != array_size_type) + subtype_size.make_typecast(array_size_type); + new_offset = div_exprt(new_offset, subtype_size); + minus_exprt new_size(prev_array_type.size(), new_offset); + do_simplify(new_size); + + array_typet new_array_type(subtype, new_size); + + expr = + byte_extract_exprt( + byte_extract_id(), + expr, + ode.offset(), + new_array_type); + } } - else - Forall_operands(it, expr) - process_array_expr(*it); } /// Rewrite index/member expressions in byte_extract to offset diff --git a/src/goto-symex/symex_other.cpp b/src/goto-symex/symex_other.cpp index 74b6a62e35a..033962cd7fb 100644 --- a/src/goto-symex/symex_other.cpp +++ b/src/goto-symex/symex_other.cpp @@ -11,8 +11,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "goto_symex.h" -#include - #include #include #include @@ -20,7 +18,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include - +#include #include void goto_symext::havoc_rec( @@ -136,179 +134,128 @@ void goto_symext::symex_other( else if(statement==ID_array_copy || statement==ID_array_replace) { - assert(code.operands().size()==2); - - codet clean_code(code); + // array_copy and array_replace take two pointers (to arrays); we need to: + // 1. dereference the pointers (via clean_expr) + // 2. find the actual array objects/candidates for objects (via + // process_array_expr) + // 3. build an assignment where the type on lhs and rhs is: + // - array_copy: the type of the first array (even if the second is smaller) + // - array_replace: the type of the second array (even if it is smaller) + DATA_INVARIANT( + code.operands().size() == 2, + "array_copy/array_replace takes two operands"); // we need to add dereferencing for both operands - dereference_exprt d0, d1; - d0.op0()=code.op0(); - d0.type()=empty_typet(); - d1.op0()=code.op1(); - d1.type()=empty_typet(); - - clean_code.op0()=d0; - clean_code.op1()=d1; - - clean_expr(clean_code.op0(), state, true); - exprt op0_offset=from_integer(0, index_type()); - if(clean_code.op0().id()==byte_extract_id() && - clean_code.op0().type().id()==ID_empty) - { - op0_offset=to_byte_extract_expr(clean_code.op0()).offset(); - clean_code.op0()=clean_code.op0().op0(); - } - clean_expr(clean_code.op1(), state, false); - exprt op1_offset=from_integer(0, index_type()); - if(clean_code.op1().id()==byte_extract_id() && - clean_code.op1().type().id()==ID_empty) - { - op1_offset=to_byte_extract_expr(clean_code.op1()).offset(); - clean_code.op1()=clean_code.op1().op0(); - } + dereference_exprt dest_array(code.op0()); + clean_expr(dest_array, state, true); + dereference_exprt src_array(code.op1()); + clean_expr(src_array, state, false); - process_array_expr(clean_code.op0()); - clean_expr(clean_code.op0(), state, true); - process_array_expr(clean_code.op1()); - clean_expr(clean_code.op1(), state, false); + // obtain the actual arrays + process_array_expr(dest_array); + process_array_expr(src_array); - - if(!base_type_eq(clean_code.op0().type(), - clean_code.op1().type(), ns) || - !op0_offset.is_zero() || !op1_offset.is_zero()) + // check for size (or type) mismatch and adjust + if(!base_type_eq(dest_array.type(), src_array.type(), ns)) { byte_extract_exprt be(byte_extract_id()); if(statement==ID_array_copy) { - be.op()=clean_code.op1(); - be.offset()=op1_offset; - be.type()=clean_code.op0().type(); - clean_code.op1()=be; - - if(!op0_offset.is_zero()) - { - byte_extract_exprt op0( - byte_extract_id(), - clean_code.op0(), - op0_offset, - clean_code.op0().type()); - clean_code.op0()=op0; - } + be.op()=src_array; + be.offset()=from_integer(0, index_type()); + be.type()=dest_array.type(); + src_array.swap(be); + do_simplify(src_array); } else { // ID_array_replace - be.op()=clean_code.op0(); - be.offset()=op0_offset; - be.type()=clean_code.op1().type(); - clean_code.op0()=be; - - if(!op1_offset.is_zero()) - { - byte_extract_exprt op1( - byte_extract_id(), - clean_code.op1(), - op1_offset, - clean_code.op1().type()); - clean_code.op1()=op1; - } + be.op()=dest_array; + be.offset()=from_integer(0, index_type()); + be.type()=src_array.type(); + dest_array.swap(be); + do_simplify(dest_array); } } - code_assignt assignment; - assignment.lhs()=clean_code.op0(); - assignment.rhs()=clean_code.op1(); + code_assignt assignment(dest_array, src_array); symex_assign(state, assignment); } else if(statement==ID_array_set) { - assert(code.operands().size()==2); - - codet clean_code(code); + // array_set takes a pointer (to an array) and a value that each element + // should be set to; we need to: + // 1. dereference the pointer (via clean_expr) + // 2. find the actual array object/candidates for objects (via + // process_array_expr) + // 3. use the type of the resulting array to construct an array_of + // expression + DATA_INVARIANT(code.operands().size() == 2, "array_set takes two operands"); // we need to add dereferencing for the first operand - dereference_exprt d0; - d0.op0()=code.op0(); - d0.type()=empty_typet(); - - clean_code.op0()=d0; + exprt array_expr = dereference_exprt(code.op0()); + clean_expr(array_expr, state, true); - clean_expr(clean_code.op0(), state, true); - if(clean_code.op0().id()==byte_extract_id() && - clean_code.op0().type().id()==ID_empty) - clean_code.op0()=clean_code.op0().op0(); - clean_expr(clean_code.op1(), state, false); + // obtain the actual array(s) + process_array_expr(array_expr); - process_array_expr(clean_code.op0()); - clean_expr(clean_code.op0(), state, true); + // prepare to build the array_of + exprt value = code.op1(); + clean_expr(value, state, false); - const typet &op0_type=ns.follow(clean_code.op0().type()); - - if(op0_type.id()!=ID_array) - throw "array_set expects array operand"; + // we might have a memset-style update of a non-array type - convert to a + // byte array + if(array_expr.type().id() != ID_array) + { + exprt array_size = size_of_expr(array_expr.type(), ns); + do_simplify(array_size); + array_expr = + byte_extract_exprt( + byte_extract_id(), + array_expr, + from_integer(0, index_type()), + array_typet(char_type(), array_size)); + } - const array_typet &array_type= - to_array_type(op0_type); + const array_typet &array_type = to_array_type(array_expr.type()); - if(!base_type_eq(array_type.subtype(), - clean_code.op1().type(), ns)) - clean_code.op1().make_typecast(array_type.subtype()); + if(!base_type_eq(array_type.subtype(), value.type(), ns)) + value.make_typecast(array_type.subtype()); - code_assignt assignment; - assignment.lhs()=clean_code.op0(); - assignment.rhs()=array_of_exprt(clean_code.op1(), array_type); + code_assignt assignment(array_expr, array_of_exprt(value, array_type)); symex_assign(state, assignment); } else if(statement==ID_array_equal) { + // array_equal takes two pointers (to arrays) and the symbol that the result + // should get assigned to; we need to: + // 1. dereference the pointers (via clean_expr) + // 2. find the actual array objects/candidates for objects (via + // process_array_expr) + // 3. build an assignment where the lhs is the previous third argument, and + // the right-hand side is an equality over the arrays, if their types match; + // if the types don't match the result trivially is false DATA_INVARIANT( code.operands().size() == 3, "array_equal expected to take three arguments"); - codet clean_code(code); + // we need to add dereferencing for the first two + dereference_exprt array1(code.op0()); + clean_expr(array1, state, false); + dereference_exprt array2(code.op1()); + clean_expr(array2, state, false); - // we need to add dereferencing for the first two operands - dereference_exprt d0, d1; - d0.op0()=code.op0(); - d0.type()=empty_typet(); - d1.op0()=code.op1(); - d1.type()=empty_typet(); + // obtain the actual arrays + process_array_expr(array1); + process_array_expr(array2); - clean_code.op0()=d0; - clean_code.op1()=d1; + code_assignt assignment(code.op2(), equal_exprt(array1, array2)); - clean_expr(clean_code.op0(), state, false); - exprt op0_offset=from_integer(0, index_type()); - if(clean_code.op0().id()==byte_extract_id() && - clean_code.op0().type().id()==ID_empty) - { - op0_offset=to_byte_extract_expr(clean_code.op0()).offset(); - clean_code.op0()=clean_code.op0().op0(); - } - clean_expr(clean_code.op1(), state, false); - exprt op1_offset=from_integer(0, index_type()); - if(clean_code.op1().id()==byte_extract_id() && - clean_code.op1().type().id()==ID_empty) - { - op1_offset=to_byte_extract_expr(clean_code.op1()).offset(); - clean_code.op1()=clean_code.op1().op0(); - } - - process_array_expr(clean_code.op0()); - clean_expr(clean_code.op0(), state, false); - process_array_expr(clean_code.op1()); - clean_expr(clean_code.op1(), state, false); - - if(!base_type_eq(clean_code.op0().type(), - clean_code.op1().type(), ns)) - { - clean_code.op2() = false_exprt(); - } + // check for size (or type) mismatch + if(!base_type_eq(array1.type(), array2.type(), ns)) + assignment.lhs() = false_exprt(); - code_assignt assignment( - clean_code.op2(), - equal_exprt(clean_code.op0(), clean_code.op1())); symex_assign(state, assignment); } else if(statement==ID_user_specified_predicate || From 9df769a466c7ad77d1e3721e33cf45369a40d554 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Sat, 17 Feb 2018 00:35:46 +0000 Subject: [PATCH 3/3] Byte offset to array index translation must use array type When dereferencing a void* pointer, the dereferenced type would always be considered compatible with the underlying object type. Nevertheless they are not actually the same. If the object was an array and the byte offset non-zero, the use of void* would yield the wrong index. The correct way to compute the index is using the actual (array) object type. Fixes: #1857 --- regression/cbmc/memcpy3/main.c | 18 ++++++++++++++++++ regression/cbmc/memcpy3/test.desc | 8 ++++++++ src/pointer-analysis/value_set_dereference.cpp | 9 ++++----- 3 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 regression/cbmc/memcpy3/main.c create mode 100644 regression/cbmc/memcpy3/test.desc diff --git a/regression/cbmc/memcpy3/main.c b/regression/cbmc/memcpy3/main.c new file mode 100644 index 00000000000..17b4e690d5a --- /dev/null +++ b/regression/cbmc/memcpy3/main.c @@ -0,0 +1,18 @@ +#include +#include + +int main() +{ + int A[3] = {1, 2, 3}; + int *p = A; + int v1, v2; + + memcpy(&v1, p, sizeof(int)); + ++p; + memcpy(&v2, p, sizeof(int)); + + assert(v1 == 1); + assert(v2 == 2); + + return 0; +} diff --git a/regression/cbmc/memcpy3/test.desc b/regression/cbmc/memcpy3/test.desc new file mode 100644 index 00000000000..9efefbc7362 --- /dev/null +++ b/regression/cbmc/memcpy3/test.desc @@ -0,0 +1,8 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 0e4c966817e..9492ea3a086 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -508,10 +508,8 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( exprt adjusted_offset; // are we doing a byte? - mp_integer element_size= - dereference_type.id()==ID_empty? - pointer_offset_size(char_type(), ns): - pointer_offset_size(dereference_type, ns); + mp_integer element_size = + pointer_offset_size(root_object_type.subtype(), ns); if(element_size==1) { @@ -520,7 +518,8 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( } else if(element_size<=0) { - throw "unknown or invalid type size of:\n"+dereference_type.pretty(); + throw "unknown or invalid type size of:\n" + + root_object_type.subtype().pretty(); } else {