From 80785691750d5ffbc2b561e0bbbefafb76dd06fd Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 12 Jun 2018 12:20:54 +0100 Subject: [PATCH 1/2] Add local-safe-pointers analysis This approaches the safe-pointers problem from the opposite direction compared to local- bitvector-analysis -- local-bitvector tries to establish a pointer is safe because it definitely points to known object, whereas local-safe-pointers tries to establish a particular usage is certainly safe because it is trivially dominated by a not-null test, which at present means a conditional GOTO or an ASSUME. --- .../goto-instrument/safe-dereferences/main.c | 71 +++++ .../safe-dereferences/test.desc | 14 + src/analyses/Makefile | 1 + src/analyses/local_safe_pointers.cpp | 253 ++++++++++++++++++ src/analyses/local_safe_pointers.h | 48 ++++ .../goto_instrument_parse_options.cpp | 32 +++ .../goto_instrument_parse_options.h | 3 +- 7 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 regression/goto-instrument/safe-dereferences/main.c create mode 100644 regression/goto-instrument/safe-dereferences/test.desc create mode 100644 src/analyses/local_safe_pointers.cpp create mode 100644 src/analyses/local_safe_pointers.h diff --git a/regression/goto-instrument/safe-dereferences/main.c b/regression/goto-instrument/safe-dereferences/main.c new file mode 100644 index 00000000000..f691ee75569 --- /dev/null +++ b/regression/goto-instrument/safe-dereferences/main.c @@ -0,0 +1,71 @@ +static void noop() { } + +int main(int argc, char **argv) { + + int x; + + // Should work (guarded by assume): + int *ptr1 = &x; + __CPROVER_assume(ptr1 != 0); + int deref1 = *ptr1; + + // Should work (guarded by else): + int *ptr2 = &x; + if(ptr2 == 0) + { + } + else + { + int deref2 = *ptr2; + } + + // Should work (guarded by if): + int *ptr3 = &x; + if(ptr3 != 0) + { + int deref3 = *ptr3; + } + + // Shouldn't work yet despite being safe (guarded by backward goto): + int *ptr4 = &x; + goto check; + +deref: + int deref4 = *ptr4; + goto end_test4; + +check: + __CPROVER_assume(ptr4 != 0); + goto deref; + +end_test4: + + // Shouldn't work yet despite being safe (guarded by confluence): + int *ptr5 = &x; + if(argc == 5) + __CPROVER_assume(ptr5 != 0); + else + __CPROVER_assume(ptr5 != 0); + int deref5 = *ptr5; + + // Shouldn't work (unsafe as only guarded on one branch): + int *ptr6 = &x; + if(argc == 6) + __CPROVER_assume(ptr6 != 0); + else + { + } + int deref6 = *ptr6; + + // Shouldn't work due to suspicion *ptr6 might alter ptr7: + int *ptr7 = &x; + __CPROVER_assume(ptr7 != 0); + ptr6 = 0; + int deref7 = *ptr7; + + // Shouldn't work due to suspicion noop() call might alter ptr8: + int *ptr8 = &x; + __CPROVER_assume(ptr8 != 0); + noop(); + int deref8 = *ptr8; +} diff --git a/regression/goto-instrument/safe-dereferences/test.desc b/regression/goto-instrument/safe-dereferences/test.desc new file mode 100644 index 00000000000..ce3853b3ff8 --- /dev/null +++ b/regression/goto-instrument/safe-dereferences/test.desc @@ -0,0 +1,14 @@ +CORE +main.c +--show-safe-dereferences +^EXIT=0$ +^SIGNAL=0$ +Safe \(known-not-null\) dereferences: \{main::[0-9]+::ptr1\} +Safe \(known-not-null\) dereferences: \{main::[0-9]+::ptr2\} +Safe \(known-not-null\) dereferences: \{main::[0-9]+::ptr3\} +-- +Safe \(known-not-null\) dereferences: \{main::[0-9]+::ptr[4-8]\} +^warning: ignoring +-- +See comments in main.c for details about what each ptr variable is testing, and why they +should or shouldn't be seen as safe accesses. diff --git a/src/analyses/Makefile b/src/analyses/Makefile index 61d3bb18e16..350a526f70c 100644 --- a/src/analyses/Makefile +++ b/src/analyses/Makefile @@ -20,6 +20,7 @@ SRC = ai.cpp \ local_bitvector_analysis.cpp \ local_cfg.cpp \ local_may_alias.cpp \ + local_safe_pointers.cpp \ locals.cpp \ natural_loops.cpp \ reaching_definitions.cpp \ diff --git a/src/analyses/local_safe_pointers.cpp b/src/analyses/local_safe_pointers.cpp new file mode 100644 index 00000000000..cd632bd8c37 --- /dev/null +++ b/src/analyses/local_safe_pointers.cpp @@ -0,0 +1,253 @@ +/*******************************************************************\ + +Module: Local safe pointer analysis + +Author: Diffblue Ltd + +\*******************************************************************/ + +/// \file +/// Local safe pointer analysis + +#include "local_safe_pointers.h" + +#include +#include + +/// If `expr` is of the form `x != nullptr`, return x. Otherwise return null +static const exprt *get_null_checked_expr(const exprt &expr) +{ + if(expr.id() == ID_notequal) + { + const exprt *op0 = &expr.op0(), *op1 = &expr.op1(); + if(op0->type().id() == ID_pointer && + *op0 == null_pointer_exprt(to_pointer_type(op0->type()))) + { + std::swap(op0, op1); + } + + if(op1->type().id() == ID_pointer && + *op1 == null_pointer_exprt(to_pointer_type(op1->type()))) + { + while(op0->id() == ID_typecast) + op0 = &op0->op0(); + return op0; + } + } + + return nullptr; +} + +/// Return structure for `get_conditional_checked_expr` +struct goto_null_checkt +{ + /// If true, the given GOTO tests that a pointer expression is non-null on the + /// taken branch; otherwise, on the not-taken branch. + bool checked_when_taken; + + /// Null-tested pointer expression + exprt checked_expr; +}; + +/// Check if a GOTO guard expression tests if a pointer is null +/// \param goto_guard: expression to check +/// \return a `goto_null_checkt` indicating what expression is tested and +/// whether the check applies on the taken or not-taken branch, or an empty +/// optionalt if this isn't a null check. +static optionalt +get_conditional_checked_expr(const exprt &goto_guard) +{ + exprt normalized_guard = goto_guard; + bool checked_when_taken = true; + while(normalized_guard.id() == ID_not || normalized_guard.id() == ID_equal) + { + if(normalized_guard.id() == ID_not) + normalized_guard = normalized_guard.op0(); + else + normalized_guard.id(ID_notequal); + checked_when_taken = !checked_when_taken; + } + + const exprt *checked_expr = get_null_checked_expr(normalized_guard); + if(!checked_expr) + return {}; + else + return goto_null_checkt { checked_when_taken, *checked_expr }; +} + +/// Compute safe dereference expressions for a given GOTO program. This +/// populates `non_null_expressions` mapping instruction location numbers +/// onto a set of expressions that are known to be non-null BEFORE that +/// instruction is executed. +/// \param goto_program: program to analyse +void local_safe_pointerst::operator()(const goto_programt &goto_program) +{ + std::set checked_expressions; + + for(const auto &instruction : goto_program.instructions) + { + // Handle control-flow convergence pessimistically: + if(instruction.incoming_edges.size() > 1) + checked_expressions.clear(); + // Retrieve working set for forward GOTOs: + else if(instruction.is_target()) + checked_expressions = non_null_expressions[instruction.location_number]; + + // Save the working set at this program point: + if(!checked_expressions.empty()) + non_null_expressions[instruction.location_number] = checked_expressions; + + switch(instruction.type) + { + // Instruction types that definitely don't write anything, and therefore + // can't store a null pointer: + case DECL: + case DEAD: + case ASSERT: + case SKIP: + case LOCATION: + case RETURN: + case THROW: + case CATCH: + break; + + // Possible checks: + case ASSUME: + { + const exprt *checked_expr; + if((checked_expr = get_null_checked_expr(instruction.guard)) != nullptr) + { + checked_expressions.insert(*checked_expr); + } + break; + } + + case GOTO: + if(!instruction.is_backwards_goto()) + { + if(auto conditional_check = + get_conditional_checked_expr(instruction.guard)) + { + auto &taken_checked_expressions = + non_null_expressions[instruction.get_target()->location_number]; + taken_checked_expressions = checked_expressions; + + if(conditional_check->checked_when_taken) + taken_checked_expressions.insert(conditional_check->checked_expr); + else + checked_expressions.insert(conditional_check->checked_expr); + + break; + } + // Otherwise fall through to... + } + + default: + // Pessimistically assume all other instructions might overwrite any + // pointer with a possibly-null value. + checked_expressions.clear(); + break; + } + } +} + +/// Output non-null expressions per instruction in human-readable format +/// \param out: stream to write output to +/// \param goto_program: GOTO program analysed (the same one passed to +/// operator()) +/// \param ns: global namespace +void local_safe_pointerst::output( + std::ostream &out, const goto_programt &goto_program, const namespacet &ns) +{ + forall_goto_program_instructions(i_it, goto_program) + { + out << "**** " << i_it->location_number << " " + << i_it->source_location << "\n"; + + out << "Non-null expressions: "; + + auto findit = non_null_expressions.find(i_it->location_number); + if(findit == non_null_expressions.end()) + out << "{}"; + else + { + out << "{"; + bool first = true; + for(const exprt &expr : findit->second) + { + if(!first) + out << ", "; + first = true; + format_rec(out, expr); + } + out << "}"; + } + + out << '\n'; + goto_program.output_instruction(ns, "", out, *i_it); + out << '\n'; + } +} + +/// Output safely dereferenced expressions per instruction in human-readable +/// format. For example, if we have an instruction `ASSUME x->y->z == a->b` +/// and we know expressions `x->y`, `a` and `other` are not null prior to it, +/// this will print `{x->y, a}`, the intersection of the `dereference_exprt`s +/// used here and the known-not-null expressions. +/// \param out: stream to write output to +/// \param goto_program: GOTO program analysed (the same one passed to +/// operator()) +/// \param ns: global namespace +void local_safe_pointerst::output_safe_dereferences( + std::ostream &out, const goto_programt &goto_program, const namespacet &ns) +{ + forall_goto_program_instructions(i_it, goto_program) + { + out << "**** " << i_it->location_number << " " + << i_it->source_location << "\n"; + + out << "Safe (known-not-null) dereferences: "; + + auto findit = non_null_expressions.find(i_it->location_number); + if(findit == non_null_expressions.end()) + out << "{}"; + else + { + out << "{"; + bool first = true; + for(auto subexpr_it = i_it->code.depth_begin(), + subexpr_end = i_it->code.depth_end(); + subexpr_it != subexpr_end; + ++subexpr_it) + { + if(subexpr_it->id() == ID_dereference) + { + if(!first) + out << ", "; + first = true; + format_rec(out, subexpr_it->op0()); + } + } + out << "}"; + } + + out << '\n'; + goto_program.output_instruction(ns, "", out, *i_it); + out << '\n'; + } +} + +/// Return true if the local-safe-pointers analysis has determined `expr` cannot +/// be null as of `program_point` (i.e. that `expr` is non-null when the +/// `program_point` instruction *starts* executing) +bool local_safe_pointerst::is_non_null_at_program_point( + const exprt &expr, goto_programt::const_targett program_point) +{ + auto findit = non_null_expressions.find(program_point->location_number); + if(findit == non_null_expressions.end()) + return false; + const exprt *tocheck = &expr; + while(tocheck->id() == ID_typecast) + tocheck = &tocheck->op0(); + return findit->second.count(*tocheck) != 0; +} diff --git a/src/analyses/local_safe_pointers.h b/src/analyses/local_safe_pointers.h new file mode 100644 index 00000000000..4d0a43b3043 --- /dev/null +++ b/src/analyses/local_safe_pointers.h @@ -0,0 +1,48 @@ +/*******************************************************************\ + +Module: Local safe pointer analysis + +Author: Diffblue Ltd + +\*******************************************************************/ + +/// \file +/// Local safe pointer analysis + +#ifndef CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H +#define CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H + +#include +#include + +/// A very simple, cheap analysis to determine when dereference operations are +/// trivially guarded by a check against a null pointer access. +/// In the interests of a very cheap analysis we only search for very local +/// guards -- at the moment only `if(x != null) { *x }` +/// and `assume(x != null); *x` are handled. Control-flow convergence and +/// possibly-aliasing operations are handled pessimistically. +class local_safe_pointerst +{ + std::map> non_null_expressions; + + public: + void operator()(const goto_programt &goto_program); + + bool is_non_null_at_program_point( + const exprt &expr, goto_programt::const_targett program_point); + + bool is_safe_dereference( + const dereference_exprt &deref, + goto_programt::const_targett program_point) + { + return is_non_null_at_program_point(deref.op0(), program_point); + } + + void output( + std::ostream &stream, const goto_programt &program, const namespacet &ns); + + void output_safe_dereferences( + std::ostream &stream, const goto_programt &program, const namespacet &ns); +}; + +#endif // CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 134ef0ab953..66ee0398ca3 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -61,6 +61,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include @@ -286,6 +287,34 @@ int goto_instrument_parse_optionst::doit() return CPROVER_EXIT_SUCCESS; } + if(cmdline.isset("show-local-safe-pointers") || + cmdline.isset("show-safe-dereferences")) + { + // Ensure location numbering is unique: + goto_model.goto_functions.update(); + + namespacet ns(goto_model.symbol_table); + + forall_goto_functions(it, goto_model.goto_functions) + { + local_safe_pointerst local_safe_pointers; + local_safe_pointers(it->second.body); + std::cout << ">>>>\n"; + std::cout << ">>>> " << it->first << '\n'; + std::cout << ">>>>\n"; + if(cmdline.isset("show-local-safe-pointers")) + local_safe_pointers.output(std::cout, it->second.body, ns); + else + { + local_safe_pointers.output_safe_dereferences( + std::cout, it->second.body, ns); + } + std::cout << '\n'; + } + + return CPROVER_EXIT_SUCCESS; + } + if(cmdline.isset("show-custom-bitvector-analysis")) { do_indirect_call_and_rtti_removal(); @@ -1480,6 +1509,9 @@ void goto_instrument_parse_optionst::help() HELP_SHOW_CLASS_HIERARCHY // NOLINTNEXTLINE(whitespace/line_length) " --show-threaded show instructions that may be executed by more than one thread\n" + " --show-local-safe-pointers show pointer expressions that are trivially dominated by a not-null check\n" // NOLINT(*) + " --show-safe-dereferences show pointer expressions that are trivially dominated by a not-null check\n" // NOLINT(*) + " *and* used as a dereference operand\n" // NOLINT(*) "\n" "Safety checks:\n" " --no-assertions ignore user assertions\n" diff --git a/src/goto-instrument/goto_instrument_parse_options.h b/src/goto-instrument/goto_instrument_parse_options.h index 779574b364c..dd6950932e4 100644 --- a/src/goto-instrument/goto_instrument_parse_options.h +++ b/src/goto-instrument/goto_instrument_parse_options.h @@ -94,7 +94,8 @@ Author: Daniel Kroening, kroening@kroening.com "(splice-call):" \ OPT_REMOVE_CALLS_NO_BODY \ OPT_REPLACE_FUNCTION_BODY \ - OPT_GOTO_PROGRAM_STATS + OPT_GOTO_PROGRAM_STATS \ + "(show-local-safe-pointers)(show-safe-dereferences)" // clang-format on From 9fd3434c96b68042824d99bab5d46a7cc82a6381 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 12 Jun 2018 12:21:20 +0100 Subject: [PATCH 2/2] Use local-safe-pointers analysis to improve Symex pointer resolution This uses local_safe_pointerst to determine when symex dereferences a pointer that cannot be null. When it does the null result is excluded from the possible values, and therefore a $invalid_object reference may be excluded from the result of dereferencing such a pointer. This can improve constant propagation. --- .../test.class | Bin 0 -> 776 bytes .../test.desc | 14 ++++ .../test.java | 25 ++++++ .../test_vccs.desc | 11 +++ .../symex_should_exclude_null_pointers/main.c | 74 ++++++++++++++++++ .../test.desc | 17 ++++ src/goto-symex/goto_symex.h | 4 + src/goto-symex/symex_dereference.cpp | 23 +++++- src/goto-symex/symex_function_call.cpp | 5 ++ src/goto-symex/symex_main.cpp | 11 ++- .../goto_program_dereference.h | 4 +- .../value_set_dereference.cpp | 18 +++-- src/pointer-analysis/value_set_dereference.h | 36 ++++++++- 13 files changed, 230 insertions(+), 12 deletions(-) create mode 100644 jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.class create mode 100644 jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.desc create mode 100644 jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.java create mode 100644 jbmc/regression/jbmc/symex_should_exclude_null_pointers/test_vccs.desc create mode 100644 regression/cbmc/symex_should_exclude_null_pointers/main.c create mode 100644 regression/cbmc/symex_should_exclude_null_pointers/test.desc diff --git a/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.class b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.class new file mode 100644 index 0000000000000000000000000000000000000000..cc8bc913aa0492d55c8052933aabfec21fe7d9da GIT binary patch literal 776 zcmYLH-%ry}7(I9G)~zk<7tFZ{It3jA9S_FDNC*ZcAsP69Az?4%ZmU}8TGMtI{}g#K zKKbm=C6eg7f0G)|9aWn2e*M1heCM2dZ-4&z_8q`I+%}OzBL@xF4cssp7gVxn;HHTT z8YXU`X<$hp{mgZ|fq>W+C@;t;a>CdR{OFMz$)4w^PFEna=K5~DE}%EJm-ZPpgMlNE z@3_9RI~w+!@To!zlsZ9QdiyeTRh|^}_=Ovhq!T+){D7J#(i;)152foX&aUD#M*guM zykbGb%!cxH&pG6mHF0>D%uMaYvVXiIUnbnywZ4~l)0&FL+zUow-`R4Not#pf#St`}Iw#ly?w9dIvo$SWKA}$jl zh^P}cLw_~S*Ey@<3h`xVzfd-yZSeh#8drfV<}jbwz00VcVoCdo>?sVLKDAFsUz|i{ U0ud9wG*b2k{~KK8ObXZj0_|;r`v3p{ literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.desc b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.desc new file mode 100644 index 00000000000..3b6bab45327 --- /dev/null +++ b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.desc @@ -0,0 +1,14 @@ +CORE +test.class +--function test.main +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +Null pointer check: FAILURE$ +assertion at file test\.java line 21 .*: SUCCESS$ +-- +^warning: ignoring +-- +JBMC should reports failure, as it might dereference a null pointer, but as Java +is a safe language we should statically determine that if we reach the assertion +it cannot be violated. diff --git a/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.java b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.java new file mode 100644 index 00000000000..bbca0a44b99 --- /dev/null +++ b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test.java @@ -0,0 +1,25 @@ +public class test { + + public int field; + + public test(int value) { + + field = value; + + } + + public static void main(boolean unknown) { + + test t = new test(12345); + test maybe_t = unknown ? null : t; + + // t could still be null, but symex ought to notice that as the Java frontend introduces + // checks before all pointer dereference operations, it can statically eliminate + // the assertion below. + + int field_value = maybe_t.field; + assert field_value == 12345; + + } + +} diff --git a/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test_vccs.desc b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test_vccs.desc new file mode 100644 index 00000000000..b99798f02dc --- /dev/null +++ b/jbmc/regression/jbmc/symex_should_exclude_null_pointers/test_vccs.desc @@ -0,0 +1,11 @@ +CORE +test.class +--show-vcc --function test.main +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +assertion at file test\.java line 22 +-- +Because symex should statically determine the assertion can't be violated there should not be a goal for it +by the time --show-vccs prints the equation. diff --git a/regression/cbmc/symex_should_exclude_null_pointers/main.c b/regression/cbmc/symex_should_exclude_null_pointers/main.c new file mode 100644 index 00000000000..4ee8d91e3bb --- /dev/null +++ b/regression/cbmc/symex_should_exclude_null_pointers/main.c @@ -0,0 +1,74 @@ +static void noop() { } + +int main(int argc, char **argv) { + + int x; + int *maybe_null = argc & 1 ? &x : 0; + + // Should work (guarded by assume): + int *ptr1 = maybe_null; + __CPROVER_assume(ptr1 != 0); + int deref1 = *ptr1; + + // Should work (guarded by else): + int *ptr2 = maybe_null; + if(ptr2 == 0) + { + } + else + { + int deref2 = *ptr2; + } + + // Should work (guarded by if): + int *ptr3 = maybe_null; + if(ptr3 != 0) + { + int deref3 = *ptr3; + } + + // Shouldn't work yet despite being safe (guarded by backward goto): + int *ptr4 = maybe_null; + goto check; + +deref: + int deref4 = *ptr4; + goto end_test4; + +check: + __CPROVER_assume(ptr4 != 0); + goto deref; + +end_test4: + + // Shouldn't work yet despite being safe (guarded by confluence): + int *ptr5 = maybe_null; + if(argc == 5) + __CPROVER_assume(ptr5 != 0); + else + __CPROVER_assume(ptr5 != 0); + int deref5 = *ptr5; + + // Shouldn't work (unsafe as only guarded on one branch): + int *ptr6 = maybe_null; + if(argc == 6) + __CPROVER_assume(ptr6 != 0); + else + { + } + int deref6 = *ptr6; + + // Shouldn't work due to suspicion write to ptr6 might alter ptr7: + int *ptr7 = maybe_null; + __CPROVER_assume(ptr7 != 0); + ptr6 = 0; + int deref7 = *ptr7; + + // Shouldn't work due to suspicion noop() call might alter ptr8: + int *ptr8 = maybe_null; + __CPROVER_assume(ptr8 != 0); + noop(); + int deref8 = *ptr8; + + assert(0); +} diff --git a/regression/cbmc/symex_should_exclude_null_pointers/test.desc b/regression/cbmc/symex_should_exclude_null_pointers/test.desc new file mode 100644 index 00000000000..e5075fa5763 --- /dev/null +++ b/regression/cbmc/symex_should_exclude_null_pointers/test.desc @@ -0,0 +1,17 @@ +CORE +main.c +--show-vcc +^EXIT=0$ +^SIGNAL=0$ +ptr4\$object +ptr5\$object +ptr6\$object +ptr7\$object +ptr8\$object +-- +ptr[1-3]\$object +^warning: ignoring +-- +ptrX\$object appearing in the VCCs indicates symex was unsure whether the pointer had a valid +target, and uses the $object symbol as a referred object of last resort. ptr1-3 should be judged +not-null by symex, so their $object symbols do not occur. diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 0e7342c2493..c1e77a3c5f1 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -12,6 +12,8 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_GOTO_SYMEX_GOTO_SYMEX_H #define CPROVER_GOTO_SYMEX_GOTO_SYMEX_H +#include + #include #include @@ -465,6 +467,8 @@ class goto_symext void rewrite_quantifiers(exprt &, statet &); path_storaget &path_storage; + + std::unordered_map safe_pointers; }; #endif // CPROVER_GOTO_SYMEX_GOTO_SYMEX_H diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index e259a515489..26c8a5bccc4 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -236,6 +236,22 @@ void goto_symext::dereference_rec( if(expr.operands().size()!=1) throw "dereference takes one operand"; + bool expr_is_not_null = false; + + if(state.threads.size() == 1) + { + const irep_idt &expr_function = state.source.pc->function; + if(!expr_function.empty()) + { + dereference_exprt to_check = to_dereference_expr(expr); + state.get_original_name(to_check); + + expr_is_not_null = + safe_pointers.at(expr_function).is_safe_dereference( + to_check, state.source.pc); + } + } + exprt tmp1; tmp1.swap(expr.op0()); @@ -246,7 +262,12 @@ void goto_symext::dereference_rec( symex_dereference_statet symex_dereference_state(*this, state); value_set_dereferencet dereference( - ns, state.symbol_table, options, symex_dereference_state, language_mode); + ns, + state.symbol_table, + options, + symex_dereference_state, + language_mode, + expr_is_not_null); // std::cout << "**** " << format(tmp1) << '\n'; exprt tmp2= diff --git a/src/goto-symex/symex_function_call.cpp b/src/goto-symex/symex_function_call.cpp index 6de8995364d..1953e898a4c 100644 --- a/src/goto-symex/symex_function_call.cpp +++ b/src/goto-symex/symex_function_call.cpp @@ -222,6 +222,11 @@ void goto_symext::symex_function_call_code( state.dirty.populate_dirty_for_function(identifier, goto_function); + auto emplace_safe_pointers_result = + safe_pointers.emplace(identifier, local_safe_pointerst{}); + if(emplace_safe_pointers_result.second) + emplace_safe_pointers_result.first->second(goto_function.body); + const bool stop_recursing=get_unwind_recursion( identifier, state.source.thread_nr, diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index 926279b7527..63f1aa1d3b1 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -132,8 +132,15 @@ void goto_symext::initialize_entry_point( INVARIANT( !pc->function.empty(), "all symexed instructions should have a function"); - state.dirty.populate_dirty_for_function( - pc->function, get_goto_function(pc->function)); + + const goto_functiont &entry_point_function = get_goto_function(pc->function); + + auto emplace_safe_pointers_result = + safe_pointers.emplace(pc->function, local_safe_pointerst{}); + if(emplace_safe_pointers_result.second) + emplace_safe_pointers_result.first->second(entry_point_function.body); + + state.dirty.populate_dirty_for_function(pc->function, entry_point_function); symex_transition(state, state.source.pc); } diff --git a/src/pointer-analysis/goto_program_dereference.h b/src/pointer-analysis/goto_program_dereference.h index 312dcc9f069..95e2d733323 100644 --- a/src/pointer-analysis/goto_program_dereference.h +++ b/src/pointer-analysis/goto_program_dereference.h @@ -34,7 +34,9 @@ class goto_program_dereferencet:protected dereference_callbackt options(_options), ns(_ns), value_sets(_value_sets), - dereference(_ns, _new_symbol_table, _options, *this, ID_nil) { } + dereference(_ns, _new_symbol_table, _options, *this, ID_nil, false) + { + } void dereference_program( goto_programt &goto_program, diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 17bf64ef4c8..b22ced28326 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -104,10 +104,14 @@ exprt value_set_dereferencet::dereference( #if 0 std::cout << "V: " << format(value.pointer_guard) << " --> "; - std::cout << format(value.value) << '\n'; + std::cout << format(value.value); + if(value.ignore) + std::cout << " (ignored)"; + std::cout << '\n'; #endif - values.push_back(value); + if(!value.ignore) + values.push_back(value); } // can this fail? @@ -313,7 +317,11 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( if(root_object.id() == ID_null_object) { - if(options.get_bool_option("pointer-check")) + if(exclude_null_derefs) + { + result.ignore = true; + } + else if(options.get_bool_option("pointer-check")) { guardt tmp_guard(guard); @@ -408,9 +416,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( // This is stuff like *((char *)5). // This is turned into an access to __CPROVER_memory[...]. - if(language_mode==ID_java) + if(language_mode == ID_java) { - result.value=nil_exprt(); + result.ignore = true; return result; } diff --git a/src/pointer-analysis/value_set_dereference.h b/src/pointer-analysis/value_set_dereference.h index a2475823b1c..ed166470245 100644 --- a/src/pointer-analysis/value_set_dereference.h +++ b/src/pointer-analysis/value_set_dereference.h @@ -35,18 +35,24 @@ class value_set_dereferencet * \param _options Options, in particular whether pointer checks are to be performed * \param _dereference_callback Callback object for error reporting + * \param _language_mode Mode for any new symbols created to represent + a dereference failure + * \param _exclude_null_derefs Ignore value-set entries that indicate a given + dereference may follow a null pointer */ value_set_dereferencet( const namespacet &_ns, symbol_tablet &_new_symbol_table, const optionst &_options, dereference_callbackt &_dereference_callback, - const irep_idt _language_mode): + const irep_idt _language_mode, + bool _exclude_null_derefs): ns(_ns), new_symbol_table(_new_symbol_table), options(_options), dereference_callback(_dereference_callback), - language_mode(_language_mode) + language_mode(_language_mode), + exclude_null_derefs(_exclude_null_derefs) { } virtual ~value_set_dereferencet() { } @@ -82,6 +88,9 @@ class value_set_dereferencet /// language_mode: ID_java, ID_C or another language identifier /// if we know the source language in use, irep_idt() otherwise. const irep_idt language_mode; + /// Flag indicating whether `value_set_dereferencet::dereference` should + /// disregard an apparent attempt to dereference NULL + const bool exclude_null_derefs; static unsigned invalid_counter; bool dereference_type_compare( @@ -92,17 +101,38 @@ class value_set_dereferencet exprt &dest, const exprt &offset) const; + /// Return value for `build_reference_to`; see that method for documentation. class valuet { public: exprt value; exprt pointer_guard; + bool ignore; - valuet():value(nil_exprt()), pointer_guard(false_exprt()) + valuet():value(nil_exprt()), pointer_guard(false_exprt()), ignore(false) { } }; + /// Get a guard and expression to access `what` under `guard`. + /// \param what: value set entry to convert to an expression: either + /// ID_unknown, ID_invalid, or an object_descriptor_exprt giving a referred + /// object and offset. + /// \param mode: whether the pointer is being read or written; used to create + /// pointer validity checks if need be + /// \param pointer: pointer expression that may point to `what` + /// \param guard: guard under which the pointer is dereferenced + /// \return + /// * If we were explicitly instructed to ignore `what` as a possible + /// pointer target: a `valuet` with `ignore` = true, and `value` and + /// `pointer_guard` set to nil. + /// * If we could build an expression corresponding to `what`: + /// A `valuet` with non-nil `value`, and `pointer_guard` set to an + /// appropriate check to determine if `pointer_expr` really points to + /// `what` (for example, we might return + /// `{.value = global, .pointer_guard = (pointer_expr == &global)}` + /// * Otherwise, if we couldn't build an expression (e.g. for `what` == + /// ID_unknown), a `valuet` with nil `value` and `ignore` == false. valuet build_reference_to( const exprt &what, const modet mode,