From f8f661c3fbe260388ee40f5f76761e28e4e966f2 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Tue, 10 Jul 2018 14:00:23 -0400 Subject: [PATCH 1/7] Add a highlevel api to test types on exprt and typet The CBMC code base normaly checks the id of an irept subtype against some magic id. But for some tests, it is necessary to check against more than one id. So this functions are use do the checks. This should further support code readability as a combination of different ids gets a name. --- src/ansi-c/c_typecast.cpp | 12 +- src/util/c_types_util.h | 108 +++++++++ src/util/std_types.h | 462 +++++++++++++++++++++----------------- 3 files changed, 365 insertions(+), 217 deletions(-) create mode 100644 src/util/c_types_util.h diff --git a/src/ansi-c/c_typecast.cpp b/src/ansi-c/c_typecast.cpp index df332a9cb1c..0546393e9b6 100644 --- a/src/ansi-c/c_typecast.cpp +++ b/src/ansi-c/c_typecast.cpp @@ -55,14 +55,14 @@ bool c_implicit_typecast_arithmetic( return !c_typecast.errors.empty(); } -bool is_void_pointer(const typet &type) +bool has_a_void_pointer(const typet &type) { if(type.id()==ID_pointer) { if(type.subtype().id()==ID_empty) return true; - return is_void_pointer(type.subtype()); + return has_a_void_pointer(type.subtype()); } else return false; @@ -216,8 +216,8 @@ bool check_c_implicit_typecast( if(src_subtype==dest_subtype) return false; - else if(is_void_pointer(src_type) || // from void to anything - is_void_pointer(dest_type)) // to void from anything + else if(has_a_void_pointer(src_type) || // from void to anything + has_a_void_pointer(dest_type)) // to void from anything return false; } @@ -518,8 +518,8 @@ void c_typecastt::implicit_typecast_followed( const typet &src_sub=ns.follow(src_type.subtype()); const typet &dest_sub=ns.follow(dest_type.subtype()); - if(is_void_pointer(src_type) || - is_void_pointer(dest_type)) + if(has_a_void_pointer(src_type) || + has_a_void_pointer(dest_type)) { // from/to void is always good } diff --git a/src/util/c_types_util.h b/src/util/c_types_util.h new file mode 100644 index 00000000000..b6ec4ffc8e1 --- /dev/null +++ b/src/util/c_types_util.h @@ -0,0 +1,108 @@ +// Copyright 2018 Author: Malte Mues + +/// \file This file contains functions, that should support test for underlying +/// c types, in cases where this is required for anlysis purpose. +#ifndef CPROVER_UTIL_C_TYPES_UTIL_H +#define CPROVER_UTIL_C_TYPES_UTIL_H + +#include "arith_tools.h" +#include "invariant.h" +#include "std_types.h" +#include "type.h" + +#include +#include + +/// This function checks, whether this has been a char type in the c program. +inline bool is_c_char(const typet &type) +{ + const irep_idt &c_type = type.get(ID_C_c_type); + return is_signed_or_unsigned_bitvector(type) && + (c_type == ID_char || c_type == ID_unsigned_char || + c_type == ID_signed_char); +} + +/// This function checks, whether the type +/// has been a bool type in the c program. +inline bool is_c_bool(const typet &type) +{ + return type.id() == ID_c_bool; +} + +/// This function checks, whether the type is has been some kind of integer +/// type in the c program. +/// It considers the signed and unsigned verison of +/// int, short, long and long long as integer types in c. +inline bool is_c_int_derivate(const typet &type) +{ + const irep_idt &c_type = type.get(ID_C_c_type); + return is_signed_or_unsigned_bitvector(type) && + (c_type == ID_signed_int || c_type == ID_unsigned_int || + c_type == ID_signed_short_int || c_type == ID_unsigned_int || + c_type == ID_unsigned_short_int || c_type == ID_signed_long_int || + c_type == ID_signed_long_long_int || c_type == ID_unsigned_long_int || + c_type == ID_unsigned_long_long_int); +} + +/// This function checks, whether type is a pointer and the target type +/// of the pointer has been a char type in the c program. +inline bool is_c_char_pointer(const typet &type) +{ + return is_pointer(type) && is_c_char(type.subtype()); +} + +/// This function checks, whether type is a pointer and the target type +/// has been some kind of int type in the c program. +/// is_c_int_derivate answers is used for checking the int type. +inline bool is_c_int_derivate_pointer(const typet &type) +{ + return is_pointer(type) && is_c_int_derivate(type.subtype()); +} + +/// This function checks, whether the type +/// has been an enum type in the c program. +inline bool is_c_enum(const typet &type) +{ + return type.id() == ID_c_enum; +} + +/// This function creates a constant representing the +/// bitvector encoded integer value of a string in the enum. +/// \param member_name is a string that should be in the enum. +/// \param c_enum the enum type memeber_name is supposed to be part of. +/// \return value a constant, that could be assigned as value for an expression +/// with type c_enum. +constant_exprt convert_memeber_name_to_enum_value( + const std::string &member_name, + const c_enum_typet &c_enum) +{ + for(const auto &enum_value : c_enum.members()) + { + if(id2string(enum_value.get_identifier()) == member_name) + { + mp_integer int_value = string2integer(id2string(enum_value.get_value())); + return from_integer(int_value, c_enum); + } + } + INVARIANT(false, "member_name must be a valid value in the c_enum."); +} + +/// This function creates a constant representing either 0 or 1 as value of +/// type type. +/// \param bool_value: A string that is compared to "true" ignoring case. +/// \param type: The type, the resulting constant is supposed to have. +/// \return a constant of type \param type with either 0 or 1 as value. +constant_exprt from_c_boolean_value(std::string bool_value, const typet &type) +{ + std::transform( + bool_value.begin(), bool_value.end(), bool_value.begin(), ::tolower); + if(bool_value == "true") + { + return from_integer(mp_integer(1), type); + } + else + { + return from_integer(mp_integer(0), type); + } +} +#endif // CPROVER_UTIL_C_TYPES_UTIL_H diff --git a/src/util/std_types.h b/src/util/std_types.h index 792f7e40ab8..b8c256d5404 100644 --- a/src/util/std_types.h +++ b/src/util/std_types.h @@ -17,100 +17,107 @@ Author: Daniel Kroening, kroening@kroening.com */ #include "expr.h" -#include "mp_arith.h" -#include "invariant.h" #include "expr_cast.h" +#include "invariant.h" +#include "mp_arith.h" #include class constant_exprt; +/// This method tests, +/// if the given typet is a constant +inline bool is_constant(const typet &type) +{ + return type.id() == ID_constant; +} + /*! * Conversion to subclasses of @ref typet */ /*! \brief The proper Booleans */ -class bool_typet:public typet +class bool_typet : public typet { public: - bool_typet():typet(ID_bool) + bool_typet() : typet(ID_bool) { } }; /*! \brief The NIL type */ -class nil_typet:public typet +class nil_typet : public typet { public: - nil_typet():typet(static_cast(get_nil_irep())) + nil_typet() : typet(static_cast(get_nil_irep())) { } }; /*! \brief The empty type */ -class empty_typet:public typet +class empty_typet : public typet { public: - empty_typet():typet(ID_empty) + empty_typet() : typet(ID_empty) { } }; /*! \brief The void type */ -class void_typet:public empty_typet +class void_typet : public empty_typet { }; /*! \brief Unbounded, signed integers */ -class integer_typet:public typet +class integer_typet : public typet { public: - integer_typet():typet(ID_integer) + integer_typet() : typet(ID_integer) { } }; /*! \brief Natural numbers (which include zero) */ -class natural_typet:public typet +class natural_typet : public typet { public: - natural_typet():typet(ID_natural) + natural_typet() : typet(ID_natural) { } }; /*! \brief Unbounded, signed rational numbers */ -class rational_typet:public typet +class rational_typet : public typet { public: - rational_typet():typet(ID_rational) + rational_typet() : typet(ID_rational) { } }; /*! \brief Unbounded, signed real numbers */ -class real_typet:public typet +class real_typet : public typet { public: - real_typet():typet(ID_real) + real_typet() : typet(ID_real) { } }; /*! \brief A reference into the symbol table */ -class symbol_typet:public typet +class symbol_typet : public typet { public: - explicit symbol_typet(const irep_idt &identifier):typet(ID_symbol) + explicit symbol_typet(const irep_idt &identifier) : typet(ID_symbol) { set_identifier(identifier); } @@ -138,7 +145,7 @@ class symbol_typet:public typet */ inline const symbol_typet &to_symbol_type(const typet &type) { - PRECONDITION(type.id()==ID_symbol); + PRECONDITION(type.id() == ID_symbol); return static_cast(type); } @@ -147,7 +154,7 @@ inline const symbol_typet &to_symbol_type(const typet &type) */ inline symbol_typet &to_symbol_type(typet &type) { - PRECONDITION(type.id()==ID_symbol); + PRECONDITION(type.id() == ID_symbol); return static_cast(type); } @@ -159,14 +166,14 @@ inline bool can_cast_type(const typet &type) /*! \brief Base type of C structs and unions, and C++ classes */ -class struct_union_typet:public typet +class struct_union_typet : public typet { public: - explicit struct_union_typet(const irep_idt &_id):typet(_id) + explicit struct_union_typet(const irep_idt &_id) : typet(_id) { } - class componentt:public exprt + class componentt : public exprt { public: componentt() @@ -176,7 +183,7 @@ class struct_union_typet:public typet componentt(const irep_idt &_name, const typet &_type) { set_name(_name); - type()=_type; + type() = _type; } const irep_idt &get_name() const @@ -257,14 +264,19 @@ class struct_union_typet:public typet return get_component(component_name).is_not_nil(); } - const componentt &get_component( - const irep_idt &component_name) const; + const componentt &get_component(const irep_idt &component_name) const; std::size_t component_number(const irep_idt &component_name) const; typet component_type(const irep_idt &component_name) const; - irep_idt get_tag() const { return get(ID_tag); } - void set_tag(const irep_idt &tag) { set(ID_tag, tag); } + irep_idt get_tag() const + { + return get(ID_tag); + } + void set_tag(const irep_idt &tag) + { + set(ID_tag, tag); + } }; /*! \brief Cast a generic typet to a \ref struct_union_typet @@ -279,7 +291,7 @@ class struct_union_typet:public typet */ inline const struct_union_typet &to_struct_union_type(const typet &type) { - PRECONDITION(type.id()==ID_struct || type.id()==ID_union); + PRECONDITION(type.id() == ID_struct || type.id() == ID_union); return static_cast(type); } @@ -288,16 +300,16 @@ inline const struct_union_typet &to_struct_union_type(const typet &type) */ inline struct_union_typet &to_struct_union_type(typet &type) { - PRECONDITION(type.id()==ID_struct || type.id()==ID_union); + PRECONDITION(type.id() == ID_struct || type.id() == ID_union); return static_cast(type); } /*! \brief Structure type */ -class struct_typet:public struct_union_typet +class struct_typet : public struct_union_typet { public: - struct_typet():struct_union_typet(ID_struct) + struct_typet() : struct_union_typet(ID_struct) { } @@ -305,6 +317,13 @@ class struct_typet:public struct_union_typet bool is_prefix_of(const struct_typet &other) const; }; +/// This method tests, +/// if the given typet is a struct +inline bool is_struct(const typet &type) +{ + return type.id() == ID_struct; +} + /*! \brief Cast a generic typet to a \ref struct_typet * * This is an unchecked conversion. \a type must be known to be \ref @@ -317,7 +336,7 @@ class struct_typet:public struct_union_typet */ inline const struct_typet &to_struct_type(const typet &type) { - PRECONDITION(type.id()==ID_struct); + PRECONDITION(type.id() == ID_struct); return static_cast(type); } @@ -326,7 +345,7 @@ inline const struct_typet &to_struct_type(const typet &type) */ inline struct_typet &to_struct_type(typet &type) { - PRECONDITION(type.id()==ID_struct); + PRECONDITION(type.id() == ID_struct); return static_cast(type); } @@ -338,10 +357,10 @@ inline bool can_cast_type(const typet &type) /*! \brief C++ class type */ -class class_typet:public struct_typet +class class_typet : public struct_typet { public: - class_typet():struct_typet() + class_typet() : struct_typet() { set(ID_C_class, true); } @@ -366,17 +385,17 @@ class class_typet:public struct_typet irep_idt default_access() const { - return is_class()?ID_private:ID_public; + return is_class() ? ID_private : ID_public; } - class baset:public exprt + class baset : public exprt { public: - baset():exprt(ID_base) + baset() : exprt(ID_base) { } - explicit baset(const typet &base):exprt(ID_base, base) + explicit baset(const typet &base) : exprt(ID_base, base) { } }; @@ -434,7 +453,7 @@ class class_typet:public struct_typet */ inline const class_typet &to_class_type(const typet &type) { - PRECONDITION(type.id()==ID_struct); + PRECONDITION(type.id() == ID_struct); return static_cast(type); } @@ -443,7 +462,7 @@ inline const class_typet &to_class_type(const typet &type) */ inline class_typet &to_class_type(typet &type) { - PRECONDITION(type.id()==ID_struct); + PRECONDITION(type.id() == ID_struct); return static_cast(type); } @@ -455,10 +474,10 @@ inline bool can_cast_type(const typet &type) /*! \brief The union type */ -class union_typet:public struct_union_typet +class union_typet : public struct_union_typet { public: - union_typet():struct_union_typet(ID_union) + union_typet() : struct_union_typet(ID_union) { } }; @@ -475,7 +494,7 @@ class union_typet:public struct_union_typet */ inline const union_typet &to_union_type(const typet &type) { - PRECONDITION(type.id()==ID_union); + PRECONDITION(type.id() == ID_union); return static_cast(type); } @@ -484,19 +503,18 @@ inline const union_typet &to_union_type(const typet &type) */ inline union_typet &to_union_type(typet &type) { - PRECONDITION(type.id()==ID_union); + PRECONDITION(type.id() == ID_union); return static_cast(type); } /*! \brief A generic tag-based type */ -class tag_typet:public typet +class tag_typet : public typet { public: - explicit tag_typet( - const irep_idt &_id, - const irep_idt &identifier):typet(_id) + explicit tag_typet(const irep_idt &_id, const irep_idt &identifier) + : typet(_id) { set_identifier(identifier); } @@ -524,9 +542,9 @@ class tag_typet:public typet */ inline const tag_typet &to_tag_type(const typet &type) { - PRECONDITION(type.id()==ID_c_enum_tag || - type.id()==ID_struct_tag || - type.id()==ID_union_tag); + PRECONDITION( + type.id() == ID_c_enum_tag || type.id() == ID_struct_tag || + type.id() == ID_union_tag); return static_cast(type); } @@ -535,20 +553,20 @@ inline const tag_typet &to_tag_type(const typet &type) */ inline tag_typet &to_tag_type(typet &type) { - PRECONDITION(type.id()==ID_c_enum_tag || - type.id()==ID_struct_tag || - type.id()==ID_union_tag); + PRECONDITION( + type.id() == ID_c_enum_tag || type.id() == ID_struct_tag || + type.id() == ID_union_tag); return static_cast(type); } /*! \brief A struct tag type */ -class struct_tag_typet:public tag_typet +class struct_tag_typet : public tag_typet { public: - explicit struct_tag_typet(const irep_idt &identifier): - tag_typet(ID_struct_tag, identifier) + explicit struct_tag_typet(const irep_idt &identifier) + : tag_typet(ID_struct_tag, identifier) { } }; @@ -565,7 +583,7 @@ class struct_tag_typet:public tag_typet */ inline const struct_tag_typet &to_struct_tag_type(const typet &type) { - PRECONDITION(type.id()==ID_struct_tag); + PRECONDITION(type.id() == ID_struct_tag); return static_cast(type); } @@ -574,18 +592,18 @@ inline const struct_tag_typet &to_struct_tag_type(const typet &type) */ inline struct_tag_typet &to_struct_tag_type(typet &type) { - PRECONDITION(type.id()==ID_struct_tag); + PRECONDITION(type.id() == ID_struct_tag); return static_cast(type); } /*! \brief A union tag type */ -class union_tag_typet:public tag_typet +class union_tag_typet : public tag_typet { public: - explicit union_tag_typet(const irep_idt &identifier): - tag_typet(ID_union_tag, identifier) + explicit union_tag_typet(const irep_idt &identifier) + : tag_typet(ID_union_tag, identifier) { } }; @@ -602,7 +620,7 @@ class union_tag_typet:public tag_typet */ inline const union_tag_typet &to_union_tag_type(const typet &type) { - PRECONDITION(type.id()==ID_union_tag); + PRECONDITION(type.id() == ID_union_tag); return static_cast(type); } @@ -611,17 +629,17 @@ inline const union_tag_typet &to_union_tag_type(const typet &type) */ inline union_tag_typet &to_union_tag_type(typet &type) { - PRECONDITION(type.id()==ID_union_tag); + PRECONDITION(type.id() == ID_union_tag); return static_cast(type); } /*! \brief A generic enumeration type (not to be confused with C enums) */ -class enumeration_typet:public typet +class enumeration_typet : public typet { public: - enumeration_typet():typet(ID_enumeration) + enumeration_typet() : typet(ID_enumeration) { } @@ -648,7 +666,7 @@ class enumeration_typet:public typet */ inline const enumeration_typet &to_enumeration_type(const typet &type) { - PRECONDITION(type.id()==ID_enumeration); + PRECONDITION(type.id() == ID_enumeration); return static_cast(type); } @@ -657,32 +675,44 @@ inline const enumeration_typet &to_enumeration_type(const typet &type) */ inline enumeration_typet &to_enumeration_type(typet &type) { - PRECONDITION(type.id()==ID_enumeration); + PRECONDITION(type.id() == ID_enumeration); return static_cast(type); } /*! \brief The type of C enums */ -class c_enum_typet:public type_with_subtypet +class c_enum_typet : public type_with_subtypet { public: - explicit c_enum_typet(const typet &_subtype): - type_with_subtypet(ID_c_enum, _subtype) + explicit c_enum_typet(const typet &_subtype) + : type_with_subtypet(ID_c_enum, _subtype) { } - class c_enum_membert:public irept + class c_enum_membert : public irept { public: - irep_idt get_value() const { return get(ID_value); } - void set_value(const irep_idt &value) { set(ID_value, value); } - irep_idt get_identifier() const { return get(ID_identifier); } + irep_idt get_value() const + { + return get(ID_value); + } + void set_value(const irep_idt &value) + { + set(ID_value, value); + } + irep_idt get_identifier() const + { + return get(ID_identifier); + } void set_identifier(const irep_idt &identifier) { set(ID_identifier, identifier); } - irep_idt get_base_name() const { return get(ID_base_name); } + irep_idt get_base_name() const + { + return get(ID_base_name); + } void set_base_name(const irep_idt &base_name) { set(ID_base_name, base_name); @@ -709,7 +739,7 @@ class c_enum_typet:public type_with_subtypet */ inline const c_enum_typet &to_c_enum_type(const typet &type) { - PRECONDITION(type.id()==ID_c_enum); + PRECONDITION(type.id() == ID_c_enum); return static_cast(type); } @@ -718,18 +748,18 @@ inline const c_enum_typet &to_c_enum_type(const typet &type) */ inline c_enum_typet &to_c_enum_type(typet &type) { - PRECONDITION(type.id()==ID_c_enum); + PRECONDITION(type.id() == ID_c_enum); return static_cast(type); } /*! \brief An enum tag type */ -class c_enum_tag_typet:public tag_typet +class c_enum_tag_typet : public tag_typet { public: - explicit c_enum_tag_typet(const irep_idt &identifier): - tag_typet(ID_c_enum_tag, identifier) + explicit c_enum_tag_typet(const irep_idt &identifier) + : tag_typet(ID_c_enum_tag, identifier) { } }; @@ -746,7 +776,7 @@ class c_enum_tag_typet:public tag_typet */ inline const c_enum_tag_typet &to_c_enum_tag_type(const typet &type) { - PRECONDITION(type.id()==ID_c_enum_tag); + PRECONDITION(type.id() == ID_c_enum_tag); return static_cast(type); } @@ -755,13 +785,13 @@ inline const c_enum_tag_typet &to_c_enum_tag_type(const typet &type) */ inline c_enum_tag_typet &to_c_enum_tag_type(typet &type) { - PRECONDITION(type.id()==ID_c_enum_tag); + PRECONDITION(type.id() == ID_c_enum_tag); return static_cast(type); } /*! \brief Base type of functions */ -class code_typet:public typet +class code_typet : public typet { public: class parametert; @@ -788,7 +818,7 @@ class code_typet:public typet /// \deprecated DEPRECATED("Use the two argument constructor instead") - code_typet():typet(ID_code) + code_typet() : typet(ID_code) { // make sure these properties are always there to avoid problems // with irept comparisons @@ -798,14 +828,14 @@ class code_typet:public typet // used to be argumentt -- now uses standard terminology - class parametert:public exprt + class parametert : public exprt { public: - parametert():exprt(ID_parameter) + parametert() : exprt(ID_parameter) { } - explicit parametert(const typet &type):exprt(ID_parameter, type) + explicit parametert(const typet &type) : exprt(ID_parameter, type) { } @@ -870,7 +900,7 @@ class code_typet:public typet const parametert *get_this() const { - const parameterst &p=parameters(); + const parameterst &p = parameters(); if(!p.empty() && p.front().get_this()) return &p.front(); else @@ -946,10 +976,9 @@ class code_typet:public typet std::vector parameter_identifiers() const { std::vector result; - const parameterst &p=parameters(); + const parameterst &p = parameters(); result.reserve(p.size()); - for(parameterst::const_iterator it=p.begin(); - it!=p.end(); it++) + for(parameterst::const_iterator it = p.begin(); it != p.end(); it++) result.push_back(it->get_identifier()); return result; } @@ -967,7 +996,7 @@ class code_typet:public typet { const irep_idt &id = p.get_identifier(); if(!id.empty()) - parameter_indices.insert({ id, index }); + parameter_indices.insert({id, index}); ++index; } return parameter_indices; @@ -1007,17 +1036,15 @@ inline code_typet &to_code_type(typet &type) return static_cast(type); } - /*! \brief arrays with given size */ -class array_typet:public type_with_subtypet +class array_typet : public type_with_subtypet { public: - array_typet( - const typet &_subtype, - const exprt &_size):type_with_subtypet(ID_array, _subtype) + array_typet(const typet &_subtype, const exprt &_size) + : type_with_subtypet(ID_array, _subtype) { - size()=_size; + size() = _size; } const exprt &size() const @@ -1040,6 +1067,12 @@ class array_typet:public type_with_subtypet return size().is_nil(); } }; +/// This method tests, +/// if the given typet is an array_typet +inline bool is_array(const typet &type) +{ + return type.id() == ID_array; +} /*! \brief Cast a generic typet to an \ref array_typet * @@ -1053,7 +1086,7 @@ class array_typet:public type_with_subtypet */ inline const array_typet &to_array_type(const typet &type) { - PRECONDITION(type.id()==ID_array); + PRECONDITION(type.id() == ID_array); return static_cast(type); } @@ -1062,21 +1095,21 @@ inline const array_typet &to_array_type(const typet &type) */ inline array_typet &to_array_type(typet &type) { - PRECONDITION(type.id()==ID_array); + PRECONDITION(type.id() == ID_array); return static_cast(type); } /*! \brief arrays without size */ -class incomplete_array_typet:public type_with_subtypet +class incomplete_array_typet : public type_with_subtypet { public: - incomplete_array_typet():type_with_subtypet(ID_incomplete_array) + incomplete_array_typet() : type_with_subtypet(ID_incomplete_array) { } - explicit incomplete_array_typet(const typet &_subtype): - type_with_subtypet(ID_array, _subtype) + explicit incomplete_array_typet(const typet &_subtype) + : type_with_subtypet(ID_array, _subtype) { } }; @@ -1093,7 +1126,7 @@ class incomplete_array_typet:public type_with_subtypet */ inline const incomplete_array_typet &to_incomplete_array_type(const typet &type) { - PRECONDITION(type.id()==ID_array); + PRECONDITION(type.id() == ID_array); return static_cast(type); } @@ -1102,35 +1135,32 @@ inline const incomplete_array_typet &to_incomplete_array_type(const typet &type) */ inline incomplete_array_typet &to_incomplete_array_type(typet &type) { - PRECONDITION(type.id()==ID_array); + PRECONDITION(type.id() == ID_array); return static_cast(type); } /*! \brief Base class of bitvector types */ -class bitvector_typet:public type_with_subtypet +class bitvector_typet : public type_with_subtypet { public: - explicit bitvector_typet(const irep_idt &_id):type_with_subtypet(_id) + explicit bitvector_typet(const irep_idt &_id) : type_with_subtypet(_id) { } - bitvector_typet(const irep_idt &_id, const typet &_subtype): - type_with_subtypet(_id, _subtype) + bitvector_typet(const irep_idt &_id, const typet &_subtype) + : type_with_subtypet(_id, _subtype) { } - bitvector_typet( - const irep_idt &_id, - const typet &_subtype, - std::size_t width): - type_with_subtypet(_id, _subtype) + bitvector_typet(const irep_idt &_id, const typet &_subtype, std::size_t width) + : type_with_subtypet(_id, _subtype) { set_width(width); } - bitvector_typet(const irep_idt &_id, std::size_t width): - type_with_subtypet(_id) + bitvector_typet(const irep_idt &_id, std::size_t width) + : type_with_subtypet(_id) { set_width(width); } @@ -1146,6 +1176,13 @@ class bitvector_typet:public type_with_subtypet } }; +/// This method tests, +/// if the given typet is a signed or unsigned bitvector. +inline bool is_signed_or_unsigned_bitvector(const typet &type) +{ + return type.id() == ID_signedbv || type.id() == ID_unsignedbv; +} + /*! \brief Cast a generic typet to a \ref bitvector_typet * * This is an unchecked conversion. \a type must be known to be \ref @@ -1158,42 +1195,34 @@ class bitvector_typet:public type_with_subtypet */ inline const bitvector_typet &to_bitvector_type(const typet &type) { - PRECONDITION(type.id()==ID_signedbv || - type.id()==ID_unsignedbv || - type.id()==ID_fixedbv || - type.id()==ID_floatbv || - type.id()==ID_verilog_signedbv || - type.id()==ID_verilog_unsignedbv || - type.id()==ID_bv || - type.id()==ID_pointer || - type.id()==ID_c_bit_field || - type.id()==ID_c_bool); + PRECONDITION( + type.id() == ID_signedbv || type.id() == ID_unsignedbv || + type.id() == ID_fixedbv || type.id() == ID_floatbv || + type.id() == ID_verilog_signedbv || type.id() == ID_verilog_unsignedbv || + type.id() == ID_bv || type.id() == ID_pointer || + type.id() == ID_c_bit_field || type.id() == ID_c_bool); return static_cast(type); } inline bitvector_typet &to_bitvector_type(typet &type) { - PRECONDITION(type.id()==ID_signedbv || - type.id()==ID_unsignedbv || - type.id()==ID_fixedbv || - type.id()==ID_floatbv || - type.id()==ID_verilog_signedbv || - type.id()==ID_verilog_unsignedbv || - type.id()==ID_bv || - type.id()==ID_pointer || - type.id()==ID_c_bit_field || - type.id()==ID_c_bool); + PRECONDITION( + type.id() == ID_signedbv || type.id() == ID_unsignedbv || + type.id() == ID_fixedbv || type.id() == ID_floatbv || + type.id() == ID_verilog_signedbv || type.id() == ID_verilog_unsignedbv || + type.id() == ID_bv || type.id() == ID_pointer || + type.id() == ID_c_bit_field || type.id() == ID_c_bool); return static_cast(type); } /*! \brief fixed-width bit-vector without numerical interpretation */ -class bv_typet:public bitvector_typet +class bv_typet : public bitvector_typet { public: - explicit bv_typet(std::size_t width):bitvector_typet(ID_bv) + explicit bv_typet(std::size_t width) : bitvector_typet(ID_bv) { set_width(width); } @@ -1211,7 +1240,7 @@ class bv_typet:public bitvector_typet */ inline const bv_typet &to_bv_type(const typet &type) { - PRECONDITION(type.id()==ID_bv); + PRECONDITION(type.id() == ID_bv); return static_cast(type); } @@ -1220,17 +1249,17 @@ inline const bv_typet &to_bv_type(const typet &type) */ inline bv_typet &to_bv_type(typet &type) { - PRECONDITION(type.id()==ID_bv); + PRECONDITION(type.id() == ID_bv); return static_cast(type); } /*! \brief Fixed-width bit-vector with unsigned binary interpretation */ -class unsignedbv_typet:public bitvector_typet +class unsignedbv_typet : public bitvector_typet { public: - explicit unsignedbv_typet(std::size_t width): - bitvector_typet(ID_unsignedbv, width) + explicit unsignedbv_typet(std::size_t width) + : bitvector_typet(ID_unsignedbv, width) { } @@ -1253,7 +1282,7 @@ class unsignedbv_typet:public bitvector_typet */ inline const unsignedbv_typet &to_unsignedbv_type(const typet &type) { - PRECONDITION(type.id()==ID_unsignedbv); + PRECONDITION(type.id() == ID_unsignedbv); return static_cast(type); } @@ -1262,17 +1291,17 @@ inline const unsignedbv_typet &to_unsignedbv_type(const typet &type) */ inline unsignedbv_typet &to_unsignedbv_type(typet &type) { - PRECONDITION(type.id()==ID_unsignedbv); + PRECONDITION(type.id() == ID_unsignedbv); return static_cast(type); } /*! \brief Fixed-width bit-vector with two's complement interpretation */ -class signedbv_typet:public bitvector_typet +class signedbv_typet : public bitvector_typet { public: - explicit signedbv_typet(std::size_t width): - bitvector_typet(ID_signedbv, width) + explicit signedbv_typet(std::size_t width) + : bitvector_typet(ID_signedbv, width) { } @@ -1295,7 +1324,7 @@ class signedbv_typet:public bitvector_typet */ inline const signedbv_typet &to_signedbv_type(const typet &type) { - PRECONDITION(type.id()==ID_signedbv); + PRECONDITION(type.id() == ID_signedbv); return static_cast(type); } @@ -1304,22 +1333,22 @@ inline const signedbv_typet &to_signedbv_type(const typet &type) */ inline signedbv_typet &to_signedbv_type(typet &type) { - PRECONDITION(type.id()==ID_signedbv); + PRECONDITION(type.id() == ID_signedbv); return static_cast(type); } /*! \brief Fixed-width bit-vector with signed fixed-point interpretation */ -class fixedbv_typet:public bitvector_typet +class fixedbv_typet : public bitvector_typet { public: - fixedbv_typet():bitvector_typet(ID_fixedbv) + fixedbv_typet() : bitvector_typet(ID_fixedbv) { } std::size_t get_fraction_bits() const { - return get_width()-get_integer_bits(); + return get_width() - get_integer_bits(); } std::size_t get_integer_bits() const; @@ -1342,23 +1371,23 @@ class fixedbv_typet:public bitvector_typet */ inline const fixedbv_typet &to_fixedbv_type(const typet &type) { - PRECONDITION(type.id()==ID_fixedbv); + PRECONDITION(type.id() == ID_fixedbv); return static_cast(type); } /*! \brief Fixed-width bit-vector with IEEE floating-point interpretation */ -class floatbv_typet:public bitvector_typet +class floatbv_typet : public bitvector_typet { public: - floatbv_typet():bitvector_typet(ID_floatbv) + floatbv_typet() : bitvector_typet(ID_floatbv) { } std::size_t get_e() const { // subtract one for sign bit - return get_width()-get_f()-1; + return get_width() - get_f() - 1; } std::size_t get_f() const; @@ -1381,17 +1410,17 @@ class floatbv_typet:public bitvector_typet */ inline const floatbv_typet &to_floatbv_type(const typet &type) { - PRECONDITION(type.id()==ID_floatbv); + PRECONDITION(type.id() == ID_floatbv); return static_cast(type); } /*! \brief Type for c bit fields */ -class c_bit_field_typet:public bitvector_typet +class c_bit_field_typet : public bitvector_typet { public: - explicit c_bit_field_typet(const typet &subtype, std::size_t width): - bitvector_typet(ID_c_bit_field, subtype, width) + explicit c_bit_field_typet(const typet &subtype, std::size_t width) + : bitvector_typet(ID_c_bit_field, subtype, width) { } @@ -1410,7 +1439,7 @@ class c_bit_field_typet:public bitvector_typet */ inline const c_bit_field_typet &to_c_bit_field_type(const typet &type) { - PRECONDITION(type.id()==ID_c_bit_field); + PRECONDITION(type.id() == ID_c_bit_field); return static_cast(type); } @@ -1426,17 +1455,17 @@ inline const c_bit_field_typet &to_c_bit_field_type(const typet &type) */ inline c_bit_field_typet &to_c_bit_field_type(typet &type) { - PRECONDITION(type.id()==ID_c_bit_field); + PRECONDITION(type.id() == ID_c_bit_field); return static_cast(type); } /*! \brief The pointer type */ -class pointer_typet:public bitvector_typet +class pointer_typet : public bitvector_typet { public: - pointer_typet(const typet &_subtype, std::size_t width): - bitvector_typet(ID_pointer, _subtype, width) + pointer_typet(const typet &_subtype, std::size_t width) + : bitvector_typet(ID_pointer, _subtype, width) { } @@ -1458,7 +1487,7 @@ class pointer_typet:public bitvector_typet */ inline const pointer_typet &to_pointer_type(const typet &type) { - PRECONDITION(type.id()==ID_pointer); + PRECONDITION(type.id() == ID_pointer); const pointer_typet &ret = static_cast(type); validate_type(ret); return ret; @@ -1469,7 +1498,7 @@ inline const pointer_typet &to_pointer_type(const typet &type) */ inline pointer_typet &to_pointer_type(typet &type) { - PRECONDITION(type.id()==ID_pointer); + PRECONDITION(type.id() == ID_pointer); pointer_typet &ret = static_cast(type); validate_type(ret); return ret; @@ -1487,13 +1516,27 @@ inline void validate_type(const pointer_typet &type) DATA_INVARIANT(type.get_width() > 0, "pointer must have non-zero width"); } +/// This method tests, +/// if the given typet is a pointer. +inline bool is_pointer(const typet &type) +{ + return type.id() == ID_pointer; +} + +/// This method tests, +/// if the given typet is a pointer of type void. +inline bool is_void_pointer(const typet &type) +{ + return is_pointer(type) && type.subtype().id() == ID_empty; +} + /*! \brief The reference type */ -class reference_typet:public pointer_typet +class reference_typet : public pointer_typet { public: - reference_typet(const typet &_subtype, std::size_t _width): - pointer_typet(_subtype, _width) + reference_typet(const typet &_subtype, std::size_t _width) + : pointer_typet(_subtype, _width) { set(ID_C_reference, true); } @@ -1511,7 +1554,7 @@ class reference_typet:public pointer_typet */ inline const reference_typet &to_reference_type(const typet &type) { - PRECONDITION(type.id()==ID_pointer && type.get_bool(ID_C_reference)); + PRECONDITION(type.id() == ID_pointer && type.get_bool(ID_C_reference)); PRECONDITION(!type.get(ID_width).empty()); return static_cast(type); } @@ -1521,7 +1564,7 @@ inline const reference_typet &to_reference_type(const typet &type) */ inline reference_typet &to_reference_type(typet &type) { - PRECONDITION(type.id()==ID_pointer && type.get_bool(ID_C_reference)); + PRECONDITION(type.id() == ID_pointer && type.get_bool(ID_C_reference)); PRECONDITION(!type.get(ID_width).empty()); return static_cast(type); } @@ -1535,15 +1578,14 @@ bool is_rvalue_reference(const typet &type); /*! \brief The C/C++ Booleans */ -class c_bool_typet:public bitvector_typet +class c_bool_typet : public bitvector_typet { public: - c_bool_typet():bitvector_typet(ID_c_bool) + c_bool_typet() : bitvector_typet(ID_c_bool) { } - explicit c_bool_typet(std::size_t width): - bitvector_typet(ID_c_bool, width) + explicit c_bool_typet(std::size_t width) : bitvector_typet(ID_c_bool, width) { } }; @@ -1560,7 +1602,7 @@ class c_bool_typet:public bitvector_typet */ inline const c_bool_typet &to_c_bool_type(const typet &type) { - PRECONDITION(type.id()==ID_c_bool); + PRECONDITION(type.id() == ID_c_bool); return static_cast(type); } @@ -1569,16 +1611,16 @@ inline const c_bool_typet &to_c_bool_type(const typet &type) */ inline c_bool_typet &to_c_bool_type(typet &type) { - PRECONDITION(type.id()==ID_c_bool); + PRECONDITION(type.id() == ID_c_bool); return static_cast(type); } /*! \brief TO_BE_DOCUMENTED */ -class string_typet:public typet +class string_typet : public typet { public: - string_typet():typet(ID_string) + string_typet() : typet(ID_string) { } }; @@ -1595,13 +1637,13 @@ class string_typet:public typet */ inline const string_typet &to_string_type(const typet &type) { - PRECONDITION(type.id()==ID_string); + PRECONDITION(type.id() == ID_string); return static_cast(type); } /*! \brief A type for subranges of integers */ -class range_typet:public typet +class range_typet : public typet { public: range_typet(const mp_integer &_from, const mp_integer &_to) @@ -1629,20 +1671,19 @@ class range_typet:public typet */ inline const range_typet &to_range_type(const typet &type) { - PRECONDITION(type.id()==ID_range); + PRECONDITION(type.id() == ID_range); return static_cast(type); } /*! \brief A constant-size array type */ -class vector_typet:public type_with_subtypet +class vector_typet : public type_with_subtypet { public: - vector_typet( - const typet &_subtype, - const exprt &_size):type_with_subtypet(ID_vector, _subtype) + vector_typet(const typet &_subtype, const exprt &_size) + : type_with_subtypet(ID_vector, _subtype) { - size()=_size; + size() = _size; } const exprt &size() const @@ -1668,7 +1709,7 @@ class vector_typet:public type_with_subtypet */ inline const vector_typet &to_vector_type(const typet &type) { - PRECONDITION(type.id()==ID_vector); + PRECONDITION(type.id() == ID_vector); return static_cast(type); } @@ -1677,21 +1718,21 @@ inline const vector_typet &to_vector_type(const typet &type) */ inline vector_typet &to_vector_type(typet &type) { - PRECONDITION(type.id()==ID_vector); + PRECONDITION(type.id() == ID_vector); return static_cast(type); } /*! \brief Complex numbers made of pair of given subtype */ -class complex_typet:public type_with_subtypet +class complex_typet : public type_with_subtypet { public: - complex_typet():type_with_subtypet(ID_complex) + complex_typet() : type_with_subtypet(ID_complex) { } - explicit complex_typet(const typet &_subtype): - type_with_subtypet(ID_complex, _subtype) + explicit complex_typet(const typet &_subtype) + : type_with_subtypet(ID_complex, _subtype) { } }; @@ -1708,7 +1749,7 @@ class complex_typet:public type_with_subtypet */ inline const complex_typet &to_complex_type(const typet &type) { - PRECONDITION(type.id()==ID_complex); + PRECONDITION(type.id() == ID_complex); return static_cast(type); } @@ -1717,19 +1758,19 @@ inline const complex_typet &to_complex_type(const typet &type) */ inline complex_typet &to_complex_type(typet &type) { - PRECONDITION(type.id()==ID_complex); + PRECONDITION(type.id() == ID_complex); return static_cast(type); } /*! \brief A type for mathematical functions (do not confuse with functions/methods in code) */ -class mathematical_function_typet:public typet +class mathematical_function_typet : public typet { public: // the domain of the function is composed of zero, one, or // many variables - class variablet:public irept + class variablet : public irept { public: // the identifier is optional @@ -1754,7 +1795,7 @@ class mathematical_function_typet:public typet } }; - using domaint=std::vector; + using domaint = std::vector; mathematical_function_typet(const domaint &_domain, const typet &_codomain) : typet(ID_mathematical_function) @@ -1776,7 +1817,7 @@ class mathematical_function_typet:public typet variablet &add_variable() { - auto &d=domain(); + auto &d = domain(); d.push_back(variablet()); return d.back(); } @@ -1805,19 +1846,18 @@ class mathematical_function_typet:public typet * \ingroup gr_std_types */ inline const mathematical_function_typet & - to_mathematical_function_type(const typet &type) +to_mathematical_function_type(const typet &type) { - PRECONDITION(type.id()==ID_mathematical_function); + PRECONDITION(type.id() == ID_mathematical_function); return static_cast(type); } /*! \copydoc to_mathematical_function_type(const typet &) * \ingroup gr_std_types */ -inline mathematical_function_typet & - to_mathematical_function_type(typet &type) +inline mathematical_function_typet &to_mathematical_function_type(typet &type) { - PRECONDITION(type.id()==ID_mathematical_function); + PRECONDITION(type.id() == ID_mathematical_function); return static_cast(type); } From 66c1db17397d47e97ba56d81fc00fbab5031a132 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Thu, 12 Jul 2018 10:22:16 -0400 Subject: [PATCH 2/7] Add an api to analyze a core dump with gdb Applying CBMC on large code bases requires sometimes to model a test environment. Running a program until a certain point and let it crash, allows to analyze the memory state at this point in time. In continuation, the memory state might be reconstructed as base for the test environment model. By using gdb to analyze the core dump, I don't have to take care of reading and interpreting the core dump myself. --- src/memory-analyzer/gdb_api.cpp | 262 +++++++++++++++++++++++++++++++ src/memory-analyzer/gdb_api.h | 76 +++++++++ unit/memory-analyzer/gdb_api.cpp | 235 +++++++++++++++++++++++++++ 3 files changed, 573 insertions(+) create mode 100644 src/memory-analyzer/gdb_api.cpp create mode 100644 src/memory-analyzer/gdb_api.h create mode 100644 unit/memory-analyzer/gdb_api.cpp diff --git a/src/memory-analyzer/gdb_api.cpp b/src/memory-analyzer/gdb_api.cpp new file mode 100644 index 00000000000..5c8d55aa22f --- /dev/null +++ b/src/memory-analyzer/gdb_api.cpp @@ -0,0 +1,262 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#include +#include +#include +#include +#include + +#include "gdb_api.h" +#include + +gdb_apit::gdb_apit(const char *arg) + : binary_name(arg), buffer_position(0), last_read_size(0) +{ + memset(buffer, 0, MAX_READ_SIZE_GDB_BUFFER); +} + +int gdb_apit::create_gdb_process() +{ + if(pipe(pipe_input) == -1) + { + throw gdb_interaction_exceptiont("could not create pipe for stdin!"); + } + if(pipe(pipe_output) == -1) + { + throw gdb_interaction_exceptiont("could not create pipe for stdout!"); + } + + gdb_process = fork(); + if(gdb_process == -1) + { + throw gdb_interaction_exceptiont("could not create gdb process."); + } + if(gdb_process == 0) + { + // child process + close(pipe_input[1]); + close(pipe_output[0]); + dup2(pipe_input[0], STDIN_FILENO); + dup2(pipe_output[1], STDOUT_FILENO); + dup2(pipe_output[1], STDERR_FILENO); + dprintf(pipe_output[1], "binary name: %s\n", binary_name); + char *arg[] = { + const_cast("gdb"), const_cast(binary_name), NULL}; + + dprintf(pipe_output[1], "Loading gdb...\n"); + execvp("gdb", arg); + + // Only reachable, if execvp failed + int errno_value = errno; + dprintf(pipe_output[1], "errno in child: %s\n", strerror(errno_value)); + } + else + { + // parent process + close(pipe_input[0]); + close(pipe_output[1]); + write_to_gdb("set max-value-size unlimited\n"); + } + return 0; +} + +void gdb_apit::terminate_gdb_process() +{ + close(pipe_input[0]); + close(pipe_input[1]); +} + +void gdb_apit::write_to_gdb(const std::string &command) +{ + if( + write(pipe_input[1], command.c_str(), command.length()) != + (ssize_t)command.length()) + { + throw gdb_interaction_exceptiont("Could not write a command to gdb"); + } +} + +std::string gdb_apit::read_next_line() +{ + char token; + std::string line; + do + { + while(buffer_position >= last_read_size) + { + read_next_buffer_chunc(); + } + token = buffer[buffer_position]; + line += token; + ++buffer_position; + } while(token != '\n'); + return line; +} + +void gdb_apit::read_next_buffer_chunc() +{ + memset(buffer, 0, last_read_size); + const auto read_size = + read(pipe_output[0], &buffer, MAX_READ_SIZE_GDB_BUFFER); + if(read_size < 0) + { + throw gdb_interaction_exceptiont("Error reading from gdb_process"); + } + last_read_size = read_size; + buffer_position = 0; +} + +void gdb_apit::run_gdb_from_core(const std::string &corefile) +{ + const std::string command = "core " + corefile + "\n"; + write_to_gdb(command); + std::string line; + while(!isdigit(line[0])) + { + line = read_next_line(); + if(check_for_gdb_core_error(line)) + { + terminate_gdb_process(); + throw gdb_interaction_exceptiont( + "This core file doesn't work with the binary."); + } + } +} + +bool gdb_apit::check_for_gdb_core_error(const std::string &line) +{ + const std::regex core_init_error_r("exec file is newer than core"); + return regex_search(line, core_init_error_r); +} + +void gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint) +{ + std::string command = "break " + breakpoint + "\n"; + write_to_gdb(command); + command = "run\n"; + write_to_gdb(command); + std::string line; + while(!isdigit(line[0])) + { + line = read_next_line(); + if(check_for_gdb_breakpoint_error(line)) + { + terminate_gdb_process(); + throw gdb_interaction_exceptiont("This is not a valid breakpoint\n"); + } + } +} + +bool gdb_apit::check_for_gdb_breakpoint_error(const std::string &line) +{ + const std::regex breakpoint_init_error_r("Make breakpoint pending on future"); + return regex_search(line, breakpoint_init_error_r); +} + +std::string gdb_apit::create_command(const std::string &variable) +{ + return "p " + variable + "\n"; +} + +std::string gdb_apit::get_memory(const std::string &variable) +{ + write_to_gdb(create_command(variable)); + const std::string &response = read_next_line(); + return extract_memory(response); +} + +// All lines in the output start with something like +// '$XX = ' where XX is a digit. => shared_prefix. +const std::string shared_prefix = R"(\$[0-9]+\s=\s)"; + +// Matching a hex encoded address. +const std::string memory_address = R"(0x[[:xdigit:]]+)"; + +std::string gdb_apit::extract_memory(const std::string &line) +{ + // The next patterns matches the type information, + // which be "(classifier struct name (*)[XX])" + // for pointer to struct arrayes. classsifier and struct is optional => {1,3} + // If it isn't an array, than the ending is " *)" + // => or expression in pointer_star_or_array_suffix. + const std::string struct_name = R"((?:\S+\s){1,3})"; + const std::string pointer_star_or_arary_suffix = + R"((?:\*|(?:\*)?\(\*\)\[[0-9]+\])?)"; + const std::string pointer_type_info = + R"(\()" + struct_name + pointer_star_or_arary_suffix + R"(\))"; + + // The pointer type info is followed by the memory value and eventually + // extended by the name of the pointer content, if gdb has an explicit symbol. + // See unit test cases for more examples of expected input. + const std::regex pointer_pattern( + shared_prefix + pointer_type_info + R"(\s()" + memory_address + + R"()(\s\S+)?)"); + const std::regex null_pointer_pattern(shared_prefix + R"((0x0))"); + // Char pointer output the memory adress followed by the string in there. + const std::regex char_pointer_pattern( + shared_prefix + R"(()" + memory_address + R"()\s"[\S[:s:]]*")"); + + std::smatch result; + if(regex_search(line, result, char_pointer_pattern)) + { + return result[1]; + } + if(regex_search(line, result, pointer_pattern)) + { + return result[1]; + } + if(regex_search(line, result, null_pointer_pattern)) + { + return result[1]; + } + throw gdb_interaction_exceptiont("Cannot resolve memory_address: " + line); +} + +std::string gdb_apit::get_value(const std::string &variable) +{ + write_to_gdb(create_command(variable)); + const std::string &response = read_next_line(); + return extract_value(response); +} + +std::string gdb_apit::extract_value(const std::string &line) +{ + // This pattern matches primitive int values and bools. + const std::regex value_pattern(shared_prefix + R"(((?:-)?\d+|true|false))"); + // This pattern matches the char pointer content. + // It is printed behind the address. + const std::regex char_value_pattern( + shared_prefix + memory_address + "\\s\"([\\S ]*)\""); + // An enum entry just prints the name of the value on the commandline. + const std::regex enum_value_pattern(shared_prefix + R"(([\S]+)(?:\n|$))"); + // A void pointer outputs it type first, followed by the memory address it + // is pointing to. This pattern should extract the address. + const std::regex void_pointer_pattern( + shared_prefix + R"((?:[\S\s]+)\s()" + memory_address + ")"); + + std::smatch result; + if(regex_search(line, result, char_value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, enum_value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, void_pointer_pattern)) + { + return result[1]; + } + std::regex memmory_access_error("Cannot access memory"); + if(regex_search(line, memmory_access_error)) + { + throw gdb_inaccessible_memoryt("ERROR: " + line); + } + throw gdb_interaction_exceptiont("Cannot extract value from this: " + line); +} + +#endif diff --git a/src/memory-analyzer/gdb_api.h b/src/memory-analyzer/gdb_api.h new file mode 100644 index 00000000000..b9e3d915b9c --- /dev/null +++ b/src/memory-analyzer/gdb_api.h @@ -0,0 +1,76 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#ifndef CPROVER_MEMORY_ANALYZER_GDB_API_H +#define CPROVER_MEMORY_ANALYZER_GDB_API_H +#include + +#include + +class namespacet; +class symbolt; +class irept; + +class gdb_apit +{ +public: + explicit gdb_apit(const char *binary); + + int create_gdb_process(); + void terminate_gdb_process(); + + void run_gdb_to_breakpoint(const std::string &breakpoint); + void run_gdb_from_core(const std::string &corefile); + + std::string get_value(const std::string &variable); + std::string get_memory(const std::string &variable); + +private: + static const int MAX_READ_SIZE_GDB_BUFFER = 600; + + const char *binary_name; + char buffer[MAX_READ_SIZE_GDB_BUFFER]; + int buffer_position; + pid_t gdb_process; + int last_read_size; + int pipe_input[2]; + int pipe_output[2]; + + static std::string create_command(const std::string &variable); + void write_to_gdb(const std::string &command); + + std::string read_next_line(); + void read_next_buffer_chunc(); + + static bool check_for_gdb_breakpoint_error(const std::string &line); + static bool check_for_gdb_core_error(const std::string &line); + + static std::string extract_value(const std::string &line); + static std::string extract_memory(const std::string &line); +}; + +class gdb_interaction_exceptiont : public std::exception +{ +public: + explicit gdb_interaction_exceptiont(std::string reason) : std::exception() + { + error = reason; + } + const char *what() const throw() + { + return error.c_str(); + } + +private: + std::string error; +}; + +class gdb_inaccessible_memoryt : public gdb_interaction_exceptiont +{ +public: + explicit gdb_inaccessible_memoryt(std::string reason) + : gdb_interaction_exceptiont(reason) + { + } +}; +#endif +#endif diff --git a/unit/memory-analyzer/gdb_api.cpp b/unit/memory-analyzer/gdb_api.cpp new file mode 100644 index 00000000000..80547419040 --- /dev/null +++ b/unit/memory-analyzer/gdb_api.cpp @@ -0,0 +1,235 @@ +// Copyright 2018 Malte Mues +#include + +#ifdef __linux__ +// \file Test that the regex expression used work as expected. +#define private public +#include +#include + +SCENARIO( + "gdb_apit uses regex as expected for memory", + "[core][memory-analyzer]") +{ + gdb_apit gdb_api(""); + GIVEN("The result of a struct pointer") + { + const std::string line = "$2 = (struct cycle_buffer *) 0x601050 "; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x601050"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x601050"); + } + } + + GIVEN("The result of a typedef pointer") + { + const std::string line = "$4 = (cycle_buffer_entry_t *) 0x602010"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x602010"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x602010"); + } + } + + GIVEN("The result of a char pointer") + { + const std::string line = "$5 = 0x400734 \"snow\""; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x400734"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x400734"); + } + } + + GIVEN("The result of a null pointer") + { + const std::string line = "$2 = 0x0"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x0"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x0"); + } + } + + GIVEN("The result of a char pointer at very low address") + { + const std::string line = "$34 = 0x007456 \"snow\""; + THEN("the result matches the memory address and not nullpointer") + { + REQUIRE(gdb_api.extract_memory(line) == "0x007456"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x007456"); + } + } + + GIVEN("The result of a char pointer with some more whitespaces") + { + const std::string line = "$12 = 0x400752 \"thunder storm\"\n"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x400752"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x400752"); + } + } + + GIVEN("The result of an array pointer") + { + const std::string line = "$5 = (struct a_sub_type_t (*)[4]) 0x602080"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x602080"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x602080"); + } + } + + GIVEN("A constant struct pointer pointing to 0x0") + { + const std::string line = "$3 = (const struct name *) 0x0"; + THEN("the returned memory address should be 0x0") + { + REQUIRE(gdb_api.extract_memory(line) == "0x0"); + } + } + + GIVEN("An enum address") + { + const std::string line = "$2 = (char *(*)[5]) 0x7e5500 "; + THEN("the returned address is the address of the enum") + { + REQUIRE(gdb_api.extract_memory(line) == "0x7e5500"); + } + } + + GIVEN("The result of an int pointer") + { + const std::string line = "$1 = (int *) 0x601088 \n"; + THEN("the result is the value of memory address of the int pointer") + { + REQUIRE(gdb_api.extract_memory(line) == "0x601088"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x601088"); + } + } + + GIVEN("Non matching result") + { + const std::string line = "Something that must not match 0x605940"; + THEN("an exception is thrown") + { + REQUIRE_THROWS_AS( + gdb_api.extract_memory(line), gdb_interaction_exceptiont); + } + } +} + +SCENARIO( + "gdb_apit uses regex as expected for value extraction", + "[core][memory-analyzer]") +{ + gdb_apit gdb_api(""); + GIVEN("An integer value") + { + const std::string line = "$90 = 100"; + THEN("the result schould be '100'") + { + REQUIRE(gdb_api.extract_value(line) == "100"); + } + } + + GIVEN("A string value") + { + const std::string line = "$5 = 0x602010 \"snow\""; + THEN("the result should be 'snow'") + { + REQUIRE(gdb_api.extract_value(line) == "snow"); + } + } + + GIVEN("A string with withe spaces") + { + const std::string line = "$1323 = 0x1243253 \"thunder storm\"\n"; + THEN("the result should be 'thunder storm'") + { + REQUIRE(gdb_api.extract_value(line) == "thunder storm"); + } + } + + GIVEN("A byte value") + { + const std::string line = "$2 = 243 '\363'"; + THEN("the result should be 243") + { + REQUIRE(gdb_api.extract_value(line) == "243"); + } + } + + GIVEN("A negative int value") + { + const std::string line = "$8 = -32"; + THEN("the result should be -32") + { + REQUIRE(gdb_api.extract_value(line) == "-32"); + } + } + + GIVEN("An enum value") + { + const std::string line = "$1 = INFO"; + THEN("the result should be INFO") + { + REQUIRE(gdb_api.extract_value(line) == "INFO"); + } + } + + GIVEN("A void pointer value") + { + const std::string line = "$6 = (const void *) 0x71"; + THEN("the requried result should be 0x71") + { + REQUIRE(gdb_api.extract_value(line) == "0x71"); + } + } + + GIVEN("A gdb response that contains 'cannot access memory'") + { + const std::string line = "Cannot access memory at address 0x71"; + THEN("a gdb_inaccesible_memoryt excepition must be raised") + { + REQUIRE_THROWS_AS(gdb_api.extract_value(line), gdb_inaccessible_memoryt); + } + } + + GIVEN("A value that must not match") + { + const std::string line = "$90 = must not match 20"; + THEN("an exception is raised") + { + REQUIRE_THROWS_AS( + gdb_api.extract_value(line), gdb_interaction_exceptiont); + } + } +} +#endif From 5165d05d3fda32f94651862736d29dc4069c7063 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Fri, 27 Jul 2018 16:03:31 -0400 Subject: [PATCH 3/7] Add constants for the integer base system On multiple locations in the code base, magic numbers are used to mark the base of an intenger conversion. This commit introduces constants and replaces the magic numbers in some places. --- src/util/mp_arith.h | 12 ++++++++++-- src/util/string2int.h | 23 +++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/util/mp_arith.h b/src/util/mp_arith.h index d20bb33f29a..ec88a7e3abd 100644 --- a/src/util/mp_arith.h +++ b/src/util/mp_arith.h @@ -18,6 +18,11 @@ Author: Daniel Kroening, kroening@kroening.com #include "optional.h" #include "deprecate.h" +const int DECIMAL_SYSTEM = 10; +const int OCTAL_SYSTEM = 8; +const int BINARY_SYSTEM = 2; +const int HEXADECIMAL_SYSTEM = 16; + // NOLINTNEXTLINE(readability/identifiers) typedef BigInt mp_integer; @@ -47,8 +52,11 @@ mp_integer rotate_right( mp_integer rotate_left( const mp_integer &, const mp_integer &, std::size_t true_size); -const std::string integer2string(const mp_integer &, unsigned base=10); -const mp_integer string2integer(const std::string &, unsigned base=10); +const std::string +integer2string(const mp_integer &, unsigned base = DECIMAL_SYSTEM); +const mp_integer +string2integer(const std::string &, unsigned base = DECIMAL_SYSTEM); + const std::string integer2binary(const mp_integer &, std::size_t width); const mp_integer binary2integer(const std::string &, bool is_signed); diff --git a/src/util/string2int.h b/src/util/string2int.h index 12c0260aaac..cd4f0459f09 100644 --- a/src/util/string2int.h +++ b/src/util/string2int.h @@ -6,28 +6,35 @@ Author: Michael Tautschnig, michael.tautschnig@cs.ox.ac.uk \*******************************************************************/ - #ifndef CPROVER_UTIL_STRING2INT_H #define CPROVER_UTIL_STRING2INT_H +#include "mp_arith.h" + #include // These check that the string is indeed a valid number, // and fail an assertion otherwise. // We use those for data types that C++11's std::stoi etc. do not // cover. -unsigned safe_string2unsigned(const std::string &str, int base=10); -std::size_t safe_string2size_t(const std::string &str, int base=10); +unsigned +safe_string2unsigned(const std::string &str, int base = DECIMAL_SYSTEM); +std::size_t +safe_string2size_t(const std::string &str, int base = DECIMAL_SYSTEM); // The below mimic C's atoi/atol: any errors are silently ignored. // They are meant to replace atoi/atol. -int unsafe_string2int(const std::string &str, int base=10); -unsigned unsafe_string2unsigned(const std::string &str, int base=10); -std::size_t unsafe_string2size_t(const std::string &str, int base=10); +int unsafe_string2int(const std::string &str, int base = DECIMAL_SYSTEM); +unsigned +unsafe_string2unsigned(const std::string &str, int base = DECIMAL_SYSTEM); +std::size_t +unsafe_string2size_t(const std::string &str, int base = DECIMAL_SYSTEM); // Same for atoll -long long int unsafe_string2signedlonglong(const std::string &str, int base=10); +long long int +unsafe_string2signedlonglong(const std::string &str, int base = DECIMAL_SYSTEM); long long unsigned int unsafe_string2unsignedlonglong( - const std::string &str, int base=10); + const std::string &str, + int base = DECIMAL_SYSTEM); #endif // CPROVER_UTIL_STRING2INT_H From c457ae9062caf4e92f56cfb176829dd612081359 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Thu, 12 Jul 2018 11:05:14 -0400 Subject: [PATCH 4/7] Add the actual core dump analyzer This code takes a static symbol, zero initalizes the typet of the symbol and then fills it up with values if possible. Following the declared structure in the typet, the analyzer queries the gdb api for values of the attributes of typet. It the symbol is a primtive type, the analyzer directly queries for the value of the symbol. --- src/memory-analyzer/analyze_symbol.cpp | 476 ++++++++++++++++++ src/memory-analyzer/analyze_symbol.h | 114 +++++ src/memory-analyzer/memory_analyzer_main.cpp | 16 + .../memory_analyzer_parse_options.cpp | 98 ++++ .../memory_analyzer_parse_options.h | 38 ++ 5 files changed, 742 insertions(+) create mode 100644 src/memory-analyzer/analyze_symbol.cpp create mode 100644 src/memory-analyzer/analyze_symbol.h create mode 100644 src/memory-analyzer/memory_analyzer_main.cpp create mode 100644 src/memory-analyzer/memory_analyzer_parse_options.cpp create mode 100644 src/memory-analyzer/memory_analyzer_parse_options.h diff --git a/src/memory-analyzer/analyze_symbol.cpp b/src/memory-analyzer/analyze_symbol.cpp new file mode 100644 index 00000000000..0a0fd12cb0b --- /dev/null +++ b/src/memory-analyzer/analyze_symbol.cpp @@ -0,0 +1,476 @@ +// Copyrigth 2018 Author: Malte Mues +#ifdef __linux__ + +#include "analyze_symbol.h" +#include "gdb_api.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +symbol_analyzert::symbol_analyzert( + const symbol_tablet &st, + gdb_apit &gdb, + message_handlert &handler) + : messaget(handler), gdb_api(gdb), ns(st), c_converter(ns), id_counter(0) +{ +} + +void symbol_analyzert::analyse_symbol(const std::string &symbol_name) +{ + const symbolt symbol = ns.lookup(symbol_name); + const symbol_exprt symbol_expr(symbol.name, symbol.type); + + if(is_c_char_pointer(symbol.type)) + { + process_char_pointer(symbol_expr, symbol.location); + } + else if(is_void_pointer(symbol.type)) + { + const exprt target_expr = + fill_void_pointer_with_values(symbol_expr, symbol.location); + + add_assignment(symbol_expr, target_expr); + } + else if(is_pointer(symbol.type)) + { + const std::string memory_location = gdb_api.get_memory(symbol_name); + + if(memory_location != "0x0") + { + const exprt target_symbol = process_any_pointer_target( + symbol_expr, memory_location, symbol.location); + const address_of_exprt reference_to_target(target_symbol); + add_assignment(symbol_expr, reference_to_target); + } + else + { + const exprt null_ptr = zero_initializer(symbol.type, symbol.location, ns); + add_assignment(symbol_expr, null_ptr); + } + } + else + { + const typet target_type = ns.follow(symbol.type); + const exprt target_expr = fill_expr_with_values( + symbol_name, + zero_initializer(target_type, symbol.location, ns), + target_type, + symbol.location); + + if(is_struct(target_type)) + { + const struct_typet structt = to_struct_type(target_type); + + for(std::size_t i = 0; i < target_expr.operands().size(); ++i) + { + auto &member_declaration = structt.components()[i]; + if(!member_declaration.get_is_padding()) + { + const auto &operand = target_expr.operands()[i]; + + const symbol_exprt symbol_op( + id2string(symbol.name) + "." + + id2string(member_declaration.get(ID_name)), + operand.type()); + add_assignment(symbol_op, operand); + } + } + } + else + { + add_assignment(symbol_expr, target_expr); + } + + try + { + const std::string memory_location = gdb_api.get_memory("&" + symbol_name); + values[memory_location] = symbol_expr; + } + catch(gdb_inaccessible_memoryt e) + { + warning() << "Couldn't access memory location for " << symbol_name << "\n" + << "continue execution anyhow." + << "You might want to check results for correctness!" + << messaget::eom; + } + } + process_outstanding_assignments(); +} + +std::string symbol_analyzert::get_code() +{ + return c_converter.convert(generated_code); +} + +void symbol_analyzert::add_assignment( + const symbol_exprt &symbol, + const exprt &value) +{ + generated_code.add(code_assignt(symbol, value)); +} + +void symbol_analyzert::process_char_pointer( + const symbol_exprt &symbol, + const source_locationt &location) +{ + const std::string memory_location = + gdb_api.get_memory(id2string(symbol.get_identifier())); + const exprt target_object = + declare_and_initalize_char_ptr(symbol, memory_location, location); + + add_assignment(symbol, target_object); +} + +array_exprt create_char_array_from_string( + std::string &values, + const bitvector_typet &type, + const source_locationt &location, + const namespacet &ns) +{ + const std::regex single_char_pattern( + "(?:(?:(?:\\\\)?([0-9]{3}))|([\\\\]{2}|\\S))"); + std::smatch result; + std::vector array_content; + size_t content_size = type.get_width(); + while(regex_search(values, result, single_char_pattern)) + { + if(!result[1].str().empty()) + { + // Char are octal encoded in gdb output. + mp_integer int_value = string2integer(result[1], OCTAL_SYSTEM); + array_content.push_back(integer2binary(int_value, content_size)); + } + else if(!result[2].str().empty()) + { + char token = result[2].str()[0]; + array_content.push_back(integer2binary(token, content_size)); + } + else + { + throw symbol_analysis_exceptiont( + "It is not supposed that non group match"); + } + values = result.suffix().str(); + } + + exprt size = from_integer(array_content.size(), size_type()); + array_typet new_array_type(type, size); + array_exprt result_array = + to_array_expr(zero_initializer(new_array_type, location, ns)); + + for(size_t i = 0; i < array_content.size(); ++i) + { + result_array.operands()[i].set(ID_value, array_content[i]); + } + + return result_array; +} + +exprt symbol_analyzert::declare_and_initalize_char_ptr( + const symbol_exprt &symbol, + const std::string &memory_location, + const source_locationt &location) +{ + const std::regex octal_encoding_pattern("(\\\\[0-9]{3})"); + + auto it = values.find(memory_location); + if(it == values.end()) + { + std::string value = gdb_api.get_value(id2string(symbol.get_identifier())); + + exprt init = string_constantt(value); + + if(regex_search(value, octal_encoding_pattern)) + { + bitvector_typet bv_type = to_bitvector_type(symbol.type().subtype()); + init = create_char_array_from_string(value, bv_type, location, ns); + } + + code_declt target_object = declare_instance(init.type()); + target_object.operands().resize(2); + target_object.op1() = init; + generated_code.add(target_object); + const address_of_exprt deref_pointer(target_object.symbol()); + const exprt casted_if_needed = + typecast_exprt::conditional_cast(deref_pointer, symbol.type()); + values[memory_location] = casted_if_needed; + return casted_if_needed; + } + else + { + return it->second; + } +} + +code_declt symbol_analyzert::declare_instance(const typet &type) +{ + const std::string var_id = "id_" + std::to_string(id_counter); + ++id_counter; + const code_declt declaration(symbol_exprt(var_id, type)); + return declaration; +} + +exprt symbol_analyzert::process_any_pointer_target( + const symbol_exprt &symbol, + const std::string memory_location, + const source_locationt &location) +{ + auto it = values.find(memory_location); + if(it == values.end()) + { + const place_holder_exprt recursion_breaker(memory_location); + values[memory_location] = recursion_breaker; + + const typet target_type = ns.follow(symbol.type().subtype()); + const code_declt target_object = get_pointer_target( + id2string(symbol.get_identifier()), target_type, location); + generated_code.add(target_object); + values[memory_location] = target_object.symbol(); + return target_object.symbol(); + } + else + { + return it->second; + } +} + +code_declt symbol_analyzert::get_pointer_target( + const std::string pointer_name, + const typet &type, + const source_locationt &location) +{ + code_declt declaration = declare_instance(type); + + const std::string deref_pointer = "(*" + pointer_name + ")"; + const exprt target_expr = fill_expr_with_values( + deref_pointer, zero_initializer(type, location, ns), type, location); + + declaration.operands().resize(2); + declaration.op1() = target_expr; + return declaration; +} + +exprt symbol_analyzert::fill_array_with_values( + const std::string &symbol_id, + exprt &array, + const typet &type, + const source_locationt &location) +{ + for(size_t i = 0; i < array.operands().size(); ++i) + { + const std::string cell_access = symbol_id + "[" + std::to_string(i) + "]"; + const typet target_type = ns.follow(array.operands()[i].type()); + if(is_c_char_pointer(target_type)) + { + const std::string memory_location = gdb_api.get_memory(cell_access); + const symbol_exprt char_ptr(cell_access, target_type); + array.operands()[i] = + declare_and_initalize_char_ptr(char_ptr, memory_location, location); + } + else + { + array.operands()[i] = fill_expr_with_values( + cell_access, array.operands()[i], target_type, location); + } + } + return array; +} + +exprt symbol_analyzert::fill_expr_with_values( + const std::string &symbol_id, + exprt expr, + const typet &type, + const source_locationt &location) +{ + if(expr.is_constant() && (is_c_int_derivate(type) || is_c_char(type))) + { + const std::string value = gdb_api.get_value(symbol_id); + const mp_integer int_rep = string2integer(value, DECIMAL_SYSTEM); + return from_integer(int_rep, type); + } + else if(expr.is_constant() && is_c_bool(type)) + { + const std::string value = gdb_api.get_value(symbol_id); + return from_c_boolean_value(value, type); + } + else if(expr.is_constant() && is_c_enum(expr.type())) + { + const std::string value = gdb_api.get_value(symbol_id); + return convert_memeber_name_to_enum_value( + value, to_c_enum_type(expr.type())); + } + else if(is_struct(type)) + { + return fill_struct_with_values(symbol_id, expr, type, location); + } + else if(is_array(type)) + { + expr = fill_array_with_values(symbol_id, expr, type, location); + } + else + { + throw symbol_analysis_exceptiont( + "unexpected thing: " + expr.pretty() + "\nexception end\n"); + } + return expr; +} + +exprt symbol_analyzert::fill_char_struct_member_with_values( + const symbol_exprt &field_access, + const exprt &default_expr, + const source_locationt &location) +{ + const std::string memory_location = + gdb_api.get_memory(id2string(field_access.get_identifier())); + if(memory_location != "0x0") + { + return declare_and_initalize_char_ptr( + field_access, memory_location, location); + } + return default_expr; +} + +exprt symbol_analyzert::fill_struct_with_values( + const std::string &symbol_id, + exprt &expr, + const typet &type, + const source_locationt &location) +{ + const struct_typet structt = to_struct_type(type); + for(size_t i = 0; i < expr.operands().size(); ++i) + { + exprt &operand = expr.operands()[i]; + const typet &resolved_type = ns.follow(operand.type()); + if(!structt.components()[i].get_is_padding()) + { + std::string field_access = + symbol_id + "." + id2string(structt.components()[i].get_name()); + symbol_exprt field_symbol(field_access, resolved_type); + + if(is_c_char_pointer(resolved_type)) + { + operand = + fill_char_struct_member_with_values(field_symbol, operand, location); + } + else if(is_void_pointer(resolved_type)) + { + operand = fill_void_pointer_with_values(field_symbol, location); + } + else if(is_struct(resolved_type)) + { + code_declt declaration = declare_instance(resolved_type); + + const exprt target_expr = fill_expr_with_values( + field_access, + zero_initializer(resolved_type, location, ns), + resolved_type, + location); + + declaration.operands().resize(2); + declaration.op1() = target_expr; + generated_code.add(declaration); + operand = declaration.symbol(); + } + else if(is_pointer(resolved_type)) + { + const std::string memory_location = gdb_api.get_memory(field_access); + + if(memory_location != "0x0") + { + const exprt target_sym = + process_any_pointer_target(field_symbol, memory_location, location); + + if(target_sym.id() == ID_skip_initialize) + { + outstanding_assigns[field_symbol] = + id2string(target_sym.get(ID_identifier)); + } + else + { + operand = address_of_exprt(target_sym); + } + } + } + else if(is_array(resolved_type)) + { + operand = fill_array_with_values( + field_access, operand, resolved_type, location); + } + else + { + const std::string value = gdb_api.get_value(field_access); + + if(is_c_int_derivate(resolved_type)) + { + const mp_integer int_rep = string2integer(value, DECIMAL_SYSTEM); + const constant_exprt constant = from_integer(int_rep, operand.type()); + expr.operands()[i] = constant; + } + } + } + } + return expr; +} + +exprt symbol_analyzert::fill_void_pointer_with_values( + const symbol_exprt &pointer_symbol, + const source_locationt &location) +{ + const std::string &pointer_name = id2string(pointer_symbol.get_identifier()); + const std::string &char_casted = "(char *)" + pointer_name; + exprt zero_ptr = zero_initializer(pointer_symbol.type(), location, ns); + + try + { + const typet &new_char_pointer = pointer_type(char_type()); + const symbol_exprt char_access_symbol(char_casted, new_char_pointer); + + const std::string memory_location = gdb_api.get_memory(pointer_name); + + if(memory_location == "0x0") + { + return zero_ptr; + } + return declare_and_initalize_char_ptr( + char_access_symbol, memory_location, location); + } + catch(gdb_inaccessible_memoryt) + { + // It is not directly what is supposed to happen, + // but doesn't corupt gdbs process state. So it's save to continue. + // Anyhow, you should inspect the result and value of field_access + // manually before using the state for verifcation. + // Field access stays zero initalized. + warning() << "Could not deal with void pointer: " << pointer_name + << "\nThe value is potential unsafe and need review\n" + << messaget::eom; + } + + return zero_ptr; +} + +void symbol_analyzert::process_outstanding_assignments() +{ + for(auto it = outstanding_assigns.begin(); it != outstanding_assigns.end(); + ++it) + { + const address_of_exprt reference_to_target(values[it->second]); + generated_code.add(code_assignt(it->first, reference_to_target)); + } +} +#endif diff --git a/src/memory-analyzer/analyze_symbol.h b/src/memory-analyzer/analyze_symbol.h new file mode 100644 index 00000000000..394c481fec7 --- /dev/null +++ b/src/memory-analyzer/analyze_symbol.h @@ -0,0 +1,114 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#ifndef CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#define CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#include +#include + +#include +#include +#include +#include + +class symbol_tablet; +class gdb_apit; +class exprt; +class source_locationt; + +class symbol_analyzert : public messaget +{ +public: + symbol_analyzert( + const symbol_tablet &st, + gdb_apit &gdb, + message_handlert &handler); + void analyse_symbol(const std::string &symbol); + std::string get_code(); + +private: + gdb_apit &gdb_api; + const namespacet ns; + expr2c_pretty_structt c_converter; + + code_blockt generated_code; + size_t id_counter; + std::map outstanding_assigns; + std::map values; + + void add_assignment(const symbol_exprt &symbol, const exprt &value); + std::map + get_value_for_struct(std::string variable, struct_typet structure); + exprt fill_array_with_values( + const std::string &symbol_id, + exprt &array, + const typet &type, + const source_locationt &location); + exprt fill_expr_with_values( + const std::string &symbol_id, + exprt expr, + const typet &type, + const source_locationt &location); + exprt fill_char_struct_member_with_values( + const symbol_exprt &field_acces, + const exprt &default_expr, + const source_locationt &location); + exprt fill_struct_with_values( + const std::string &symbol_id, + exprt &expr, + const typet &type, + const source_locationt &location); + exprt fill_void_pointer_with_values( + const symbol_exprt &symbol, + const source_locationt &location); + + exprt process_any_pointer_target( + const symbol_exprt &symbol, + const std::string memory_location, + const source_locationt &location); + code_declt get_pointer_target( + const std::string deref_pointer, + const typet &type, + const source_locationt &location); + + code_declt declare_instance(const typet &type); + exprt declare_and_initalize_char_ptr( + const symbol_exprt &symbol, + const std::string &memory_location, + const source_locationt &location); + void process_char_pointer( + const symbol_exprt &symbol, + const source_locationt &location); + + void process_outstanding_assignments(); +}; + +class symbol_analysis_exceptiont : public std::exception +{ +public: + explicit symbol_analysis_exceptiont(std::string reason) : std::exception() + { + error = reason; + } + const char *what() const throw() + { + return error.c_str(); + } + +private: + std::string error; +}; + +class place_holder_exprt : public exprt +{ +public: + place_holder_exprt() : exprt(ID_skip_initialize) + { + } + explicit place_holder_exprt(const irep_idt &identifier) + : exprt(ID_skip_initialize) + { + set(ID_identifier, identifier); + } +}; +#endif // CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#endif diff --git a/src/memory-analyzer/memory_analyzer_main.cpp b/src/memory-analyzer/memory_analyzer_main.cpp new file mode 100644 index 00000000000..ae57e4c9af2 --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_main.cpp @@ -0,0 +1,16 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ + +#include "memory_analyzer_parse_options.h" + +int main(int argc, const char **argv) +{ + memory_analyzer_parse_optionst parse_options(argc, argv); + return parse_options.main(); +} +#else +int main() +{ + throw "only supported on Linux platforms currently\n"; +} +#endif diff --git a/src/memory-analyzer/memory_analyzer_parse_options.cpp b/src/memory-analyzer/memory_analyzer_parse_options.cpp new file mode 100644 index 00000000000..ee556f4542d --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_parse_options.cpp @@ -0,0 +1,98 @@ +// Copyright 2018 Author: Malte Mues + +/// \file +/// Commandline parser for the memory analyzer executing main work. + +#include "memory_analyzer_parse_options.h" +#include "analyze_symbol.h" +#include "gdb_api.h" + +#include +#include +#include +#include +#include +#include + +int memory_analyzer_parse_optionst::doit() +{ + // This script is the entry point and has to make sure + // that the config object is set to a valid architecture. + // config is later used to determine right size for types. + // If config is not set, you might see a bunch of types with + // size 0. + // It might be desierable to convert this into flags in the future. + config.ansi_c.mode = configt::ansi_ct::flavourt::GCC; + config.ansi_c.set_arch_spec_x86_64(); + + if(cmdline.args.size() != 1) + { + error() << "Please provide one binary with symbols to process" << eom; + return CPROVER_EXIT_USAGE_ERROR; + } + + std::string binary = cmdline.args[0]; + + gdb_apit gdb_api = gdb_apit(binary.c_str()); + gdb_api.create_gdb_process(); + + if(cmdline.isset("core-file")) + { + std::string core_file = cmdline.get_value("core-file"); + gdb_api.run_gdb_from_core(core_file); + } + else if(cmdline.isset("breakpoint")) + { + std::string breakpoint = cmdline.get_value("breakpoint"); + gdb_api.run_gdb_to_breakpoint(breakpoint); + } + else + { + error() << "Either the 'core-file' or 'breakpoint; flag must be provided\n" + << "Cannot execute memory-analyzer. Going to shut it down...\n" + << messaget::eom; + + gdb_api.terminate_gdb_process(); + + help(); + return CPROVER_EXIT_USAGE_ERROR; + } + + if(cmdline.isset("symbols")) + { + const std::string symbol_list(cmdline.get_value("symbols")); + std::vector result; + split_string(symbol_list, ',', result, false, false); + + goto_modelt goto_model; + read_goto_binary(binary, goto_model, ui_message_handler); + + symbol_analyzert analyzer( + goto_model.symbol_table, gdb_api, ui_message_handler); + + for(auto it = result.begin(); it != result.end(); it++) + { + messaget::result() << "analyzing symbol: " << (*it) << "\n"; + analyzer.analyse_symbol(*it); + } + + messaget::result() << "GENERATED CODE: \n" << messaget::eom; + messaget::result() << analyzer.get_code() << "\n" << messaget::eom; + + gdb_api.terminate_gdb_process(); + return CPROVER_EXIT_SUCCESS; + } + else + { + error() << "It is required to provide the symbols flag in order to make " + << "this tool work.\n" + << messaget::eom; + } + gdb_api.terminate_gdb_process(); + help(); + return CPROVER_EXIT_USAGE_ERROR; +} + +void memory_analyzer_parse_optionst::help() +{ +} diff --git a/src/memory-analyzer/memory_analyzer_parse_options.h b/src/memory-analyzer/memory_analyzer_parse_options.h new file mode 100644 index 00000000000..f467d962e27 --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_parse_options.h @@ -0,0 +1,38 @@ +// Copyright 2018 Author: Malte Mues + +/// \file +/// This code does the command line parsing for the memory-analyzer tool + +#ifndef CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H +#define CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H + +#include +#include + +// clang-format off +#define MEMMORY_ANALYZER_OPTIONS \ + "(core-file):" \ + "(breakpoint):" \ + "(symbols):" + +// clang-format on + +class memory_analyzer_parse_optionst : public parse_options_baset, + public messaget +{ +public: + int doit() override; + void help() override; + + memory_analyzer_parse_optionst(int argc, const char **argv) + : parse_options_baset(MEMMORY_ANALYZER_OPTIONS, argc, argv), + messaget(ui_message_handler), + ui_message_handler(cmdline, "memory-analyzer") + { + } + +protected: + ui_message_handlert ui_message_handler; +}; + +#endif // CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H From fc7046cb592fbc643d07f16babd09cf0e5d5e7b1 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Thu, 12 Jul 2018 20:10:51 -0400 Subject: [PATCH 5/7] Add regression tests for the memory analyzer These test have been used as driving example for the memory analyzer. They test basic functionality and handling of cycles in structs. --- regression/memory-analyzer/arrays/arrays.c | 82 +++++++++++++++++++ regression/memory-analyzer/arrays/arrays.h | 32 ++++++++ regression/memory-analyzer/arrays/test.desc | 20 +++++ regression/memory-analyzer/compile_example.sh | 10 +++ regression/memory-analyzer/cycles/cycles.c | 61 ++++++++++++++ regression/memory-analyzer/cycles/cycles.h | 27 ++++++ regression/memory-analyzer/cycles/test.desc | 20 +++++ .../primitive_types/primitive_types.c | 42 ++++++++++ .../primitive_types/primitive_types.h | 28 +++++++ .../memory-analyzer/primitive_types/test.desc | 27 ++++++ .../memory-analyzer/void_pointer/test.desc | 16 ++++ .../void_pointer/void_pointer.c | 18 ++++ .../void_pointer/void_pointer.h | 15 ++++ 13 files changed, 398 insertions(+) create mode 100644 regression/memory-analyzer/arrays/arrays.c create mode 100644 regression/memory-analyzer/arrays/arrays.h create mode 100644 regression/memory-analyzer/arrays/test.desc create mode 100755 regression/memory-analyzer/compile_example.sh create mode 100644 regression/memory-analyzer/cycles/cycles.c create mode 100644 regression/memory-analyzer/cycles/cycles.h create mode 100644 regression/memory-analyzer/cycles/test.desc create mode 100644 regression/memory-analyzer/primitive_types/primitive_types.c create mode 100644 regression/memory-analyzer/primitive_types/primitive_types.h create mode 100644 regression/memory-analyzer/primitive_types/test.desc create mode 100644 regression/memory-analyzer/void_pointer/test.desc create mode 100644 regression/memory-analyzer/void_pointer/void_pointer.c create mode 100644 regression/memory-analyzer/void_pointer/void_pointer.h diff --git a/regression/memory-analyzer/arrays/arrays.c b/regression/memory-analyzer/arrays/arrays.c new file mode 100644 index 00000000000..f122024a280 --- /dev/null +++ b/regression/memory-analyzer/arrays/arrays.c @@ -0,0 +1,82 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This file test array support in the memory-analyzer. +/// A more detailed description of the test idea is in example3.h. +/// setup() prepares the data structure. +/// manipulate_data() is the hook used for the test, +/// where gdb reaches the breakpoint. +/// main() is just the required boilerplate around this methods, +/// to make the compiled result executable. + +#include "arrays.h" + +#include +#include +#include + +void setup() +{ + test_struct = malloc(sizeof(struct a_typet)); + for(int i = 0; i < 10; i++) + { + test_struct->config[i] = i + 10; + } + for(int i = 0; i < 10; i++) + { + test_struct->values[i] = 0xf3; + } + for(int i = 10; i < 20; i++) + { + test_struct->values[i] = 0x3f; + } + for(int i = 20; i < 30; i++) + { + test_struct->values[i] = 0x01; + } + for(int i = 30; i < 40; i++) + { + test_struct->values[i] = 0x01; + } + for(int i = 40; i < 50; i++) + { + test_struct->values[i] = 0xff; + } + for(int i = 50; i < 60; i++) + { + test_struct->values[i] = 0x00; + } + for(int i = 60; i < 70; i++) + { + test_struct->values[i] = 0xab; + } + messaget option1 = {.text = "accept"}; + for(int i = 0; i < 4; i++) + { + char *value = malloc(sizeof(char *)); + sprintf(value, "unique%i", i); + entryt your_options = { + .id = 1, .options[0] = option1, .options[1].text = value}; + your_options.id = i + 12; + test_struct->childs[i].id = your_options.id; + test_struct->childs[i].options[0] = your_options.options[0]; + test_struct->childs[i].options[1].text = your_options.options[1].text; + } + test_struct->initalized = true; +} + +int manipulate_data() +{ + for(int i = 0; i < 4; i++) + { + free(test_struct->childs[i].options[1].text); + test_struct->childs[i].options[1].text = "decline"; + } +} + +int main() +{ + setup(); + manipulate_data(); + return 0; +} diff --git a/regression/memory-analyzer/arrays/arrays.h b/regression/memory-analyzer/arrays/arrays.h new file mode 100644 index 00000000000..cb760ce8ec5 --- /dev/null +++ b/regression/memory-analyzer/arrays/arrays.h @@ -0,0 +1,32 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example3 test explicitly the array support. +/// It ensures, that arrays are handled right. +/// The different typedefs have been used, to make sure the memory-analyzer +/// is aware of the different appeareances in the gdb responses. + +#include + +struct a_sub_sub_typet +{ + char *text; +}; + +typedef struct a_sub_sub_typet messaget; + +struct a_sub_typet +{ + int id; + messaget options[2]; +}; + +struct a_typet +{ + int config[10]; + bool initalized; + unsigned char values[70]; + struct a_sub_typet childs[4]; +} * test_struct; + +typedef struct a_sub_typet entryt; diff --git a/regression/memory-analyzer/arrays/test.desc b/regression/memory-analyzer/arrays/test.desc new file mode 100644 index 00000000000..58eecec3aa4 --- /dev/null +++ b/regression/memory-analyzer/arrays/test.desc @@ -0,0 +1,20 @@ +CORE +arrays.exe +--breakpoint manipulate_data --symbols test_struct +analyzing symbol: test_struct +GENERATED CODE: +\{ + char id_1\[7l\]="accept"; + char id_2\[8l\]="unique0"; + char id_3\[8l\]="unique1"; + char id_4\[8l\]="unique2"; + char id_5\[8l\]="unique3"; + struct a_typet id_0=\{ .config=\{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 \}, .initalized=false, + .values=\{ 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171 \}, .childs=\{ \{ .id=12, .options=\{ \{ .text=\(char \*\)&id_1 \}, \{ .text=\(char \*\)&id_2 \} \} \}, + \{ .id=13, .options=\{ \{ .text=\(char \*\)&id_1 \}, \{ .text=\(char \*\)&id_3 \} \} \}, + \{ .id=14, .options=\{ \{ .text=\(char \*\)&id_1 \}, \{ .text=\(char \*\)&id_4 \} \} \}, + \{ .id=15, .options=\{ \{ .text=\(char \*\)&id_1 \}, \{ .text=\(char \*\)&id_5 \} \} \} \} \}; + test_struct = &id_0; +\} +-- +-- diff --git a/regression/memory-analyzer/compile_example.sh b/regression/memory-analyzer/compile_example.sh new file mode 100755 index 00000000000..a7c9f17006b --- /dev/null +++ b/regression/memory-analyzer/compile_example.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +MEMORYANALYZER="../../../src/memory-analyzer/memory-analyzer" + +set -e + +NAME=${5%.exe} + +goto-gcc -g -o $NAME.exe $NAME.c +$MEMORYANALYZER $@ diff --git a/regression/memory-analyzer/cycles/cycles.c b/regression/memory-analyzer/cycles/cycles.c new file mode 100644 index 00000000000..fd35e04bc23 --- /dev/null +++ b/regression/memory-analyzer/cycles/cycles.c @@ -0,0 +1,61 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This example deals with cyclic data structures, +/// see comment in example2.h explaining why this is necessary. +/// add_element is just declared as a helper method for cycle construction. +/// process_buffer isn't supposed to do meaningfull stuff. +/// It is the hook for the gdb breakpoint used in the test. +/// free_buffer just does clean up, if you run this. + +#include "cycles.h" +void add_element(char *content) +{ + cycle_buffer_entry_t *new_entry = malloc(sizeof(cycle_buffer_entry_t)); + new_entry->data = content; + if(buffer.end && buffer.start) + { + new_entry->next = buffer.start; + buffer.end->next = new_entry; + buffer.end = new_entry; + } + else + { + new_entry->next = new_entry; + buffer.end = new_entry; + buffer.start = new_entry; + } +} + +int process_buffer() +{ + return 0; +} + +void free_buffer() +{ + cycle_buffer_entry_t *current; + cycle_buffer_entry_t *free_next; + if(buffer.start) + { + current = buffer.start->next; + while(current != buffer.start) + { + free_next = current; + current = current->next; + free(free_next); + } + free(current); + } +} + +int main() +{ + add_element("snow"); + add_element("sun"); + add_element("rain"); + add_element("thunder storm"); + + process_buffer(); + free_buffer(); +} diff --git a/regression/memory-analyzer/cycles/cycles.h b/regression/memory-analyzer/cycles/cycles.h new file mode 100644 index 00000000000..0c96abc9760 --- /dev/null +++ b/regression/memory-analyzer/cycles/cycles.h @@ -0,0 +1,27 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example2 deals with cycles in datastructures. +/// +/// While it is common that some datastructures contain cylces, +/// it is necessary that the memory-analyzer does recognize them. +/// Otherwise it would follow the datastructures pointer establishing +/// the cycle for ever and never terminate execution. +/// The cycle_buffer described below contains a cycle. +/// As long as this regression test works, cycle introduced by using pointers +/// are handle the correct way. + +#include +typedef struct cycle_buffer_entry +{ + char *data; + struct cycle_buffer_entry *next; +} cycle_buffer_entry_t; + +struct cycle_buffer +{ + cycle_buffer_entry_t *start; + struct cycle_buffer_entry *end; +}; + +struct cycle_buffer buffer; diff --git a/regression/memory-analyzer/cycles/test.desc b/regression/memory-analyzer/cycles/test.desc new file mode 100644 index 00000000000..438e56143b1 --- /dev/null +++ b/regression/memory-analyzer/cycles/test.desc @@ -0,0 +1,20 @@ +CORE +cycles.exe +--breakpoint process_buffer --symbols buffer +analyzing symbol: buffer +GENERATED CODE: +\{ + char id_1\[5l\]="snow"; + char id_3\[4l\]="sun"; + char id_5\[5l\]="rain"; + char id_7\[14l\]="thunder storm"; + struct cycle_buffer_entry id_6=\{ .data=\(char \*\)&id_7, .next=\(\(struct cycle_buffer_entry \*\)NULL\) \}; + struct cycle_buffer_entry id_4=\{ .data=\(char \*\)&id_5, .next=&id_6 \}; + struct cycle_buffer_entry id_2=\{ .data=\(char \*\)&id_3, .next=&id_4 \}; + struct cycle_buffer_entry id_0=\{ .data=\(char \*\)&id_1, .next=&id_2 \}; + buffer.start = &id_0; + buffer.end = &id_6; + \(\*\(\*\(\*\(\*buffer.start\).next\).next\).next\).next = &id_0; +\} +-- +-- diff --git a/regression/memory-analyzer/primitive_types/primitive_types.c b/regression/memory-analyzer/primitive_types/primitive_types.c new file mode 100644 index 00000000000..32d86bc53aa --- /dev/null +++ b/regression/memory-analyzer/primitive_types/primitive_types.c @@ -0,0 +1,42 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This is just a basic example. +/// Pointer references are tested and ensured, that for example f and f_1 are +/// pointing to the same int value location after running memory-analyzer. + +#include "primitive_types.h" +int my_function(char *s) +{ + int a = 10; + g->a = a; + g->b = s; + return 0; +} + +int main(int argc, char **argv) +{ + char *test; + + e = 17; + + f = malloc(sizeof(int)); + f = &e; + f_1 = f; + + h = "test"; + + g = malloc(sizeof(struct mapping_things)); + d.a = 4; + d.c = -32; + test = "test2"; + + d.b = test; + + my_function(test); + + free(g); + free(f); + + return 0; +} diff --git a/regression/memory-analyzer/primitive_types/primitive_types.h b/regression/memory-analyzer/primitive_types/primitive_types.h new file mode 100644 index 00000000000..cc1e44b6370 --- /dev/null +++ b/regression/memory-analyzer/primitive_types/primitive_types.h @@ -0,0 +1,28 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example1 is just demonstrating, that the tool works in general. +/// A small struct and a few primitive pointers are declared in the global +/// namespace. These are assigned with values before calling my_function +/// and then, it is tested that this global state can be reconstructed before +/// calling my_function. As long as this example work basic functionallity is +/// provided. + +#include + +struct mapping_things +{ + int a; + char *b; + int c; +}; + +typedef struct mapping_things other_things; + +other_things d; +int e; +int *f; +int *f_1; +struct mapping_things *g; +char *h; +int my_function(char *s); diff --git a/regression/memory-analyzer/primitive_types/test.desc b/regression/memory-analyzer/primitive_types/test.desc new file mode 100644 index 00000000000..bccfda3147c --- /dev/null +++ b/regression/memory-analyzer/primitive_types/test.desc @@ -0,0 +1,27 @@ +CORE +primitive_types.exe +--breakpoint my_function --symbols e,f,f_1,d,g,h +analyzing symbol: e +analyzing symbol: f +analyzing symbol: f_1 +analyzing symbol: d +analyzing symbol: g +analyzing symbol: h +GENERATED CODE: +\{ + e = 17; + f = &e; + f_1 = &e; + char id_0\[6l\]="test2"; + d.a = 4; + d.b = \(char \*\)&id_0; + d.c = -32; + struct mapping_things id_1=\{ .a=0, .b=\(\(char \*\)NULL\), .c=0 \}; + g = &id_1; + char id_2\[5l\]="test"; + h = \(char \*\)&id_2; +} +^EXIT=0 +^SIGNAL=0 +-- +-- diff --git a/regression/memory-analyzer/void_pointer/test.desc b/regression/memory-analyzer/void_pointer/test.desc new file mode 100644 index 00000000000..dc49d59d0a8 --- /dev/null +++ b/regression/memory-analyzer/void_pointer/test.desc @@ -0,0 +1,16 @@ +CORE +void_pointer.exe +--breakpoint void_pointer.c:17 --symbols a_void_pointer,a_second_void_pointer,a_third_void_pointer,blob +analyzing symbol: blob +GENERATED CODE: +\{ + a_void_pointer = NULL; + char id_0\[12l\]="test_string"; + a_second_void_pointer = \(char \*\)&id_0; + char id_1\[7ul\]=\{ -13, -13, 'H', -1, '\\\\', '\\\\', -1 \}; + a_third_void_pointer = \(char \*\)&id_1; + blob.size = 7; + blob.data = \(char \*\)&id_1; +\} +-- +-- diff --git a/regression/memory-analyzer/void_pointer/void_pointer.c b/regression/memory-analyzer/void_pointer/void_pointer.c new file mode 100644 index 00000000000..88224166336 --- /dev/null +++ b/regression/memory-analyzer/void_pointer/void_pointer.c @@ -0,0 +1,18 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This file initializes some void pointer in different styles. +/// Later the memory-analyzer tries to reconstruct the content. + +#include "void_pointer.h" +int main() +{ + char *char_pointer = "test_string"; + a_second_void_pointer = char_pointer; + char bytes[] = {0xf3, 0xf3, 0x48, 0xff, 0x5c, 0x5c, 0xff}; + a_third_void_pointer = &bytes; + + blob.data = bytes; + blob.size = sizeof(bytes); + return 0; +} diff --git a/regression/memory-analyzer/void_pointer/void_pointer.h b/regression/memory-analyzer/void_pointer/void_pointer.h new file mode 100644 index 00000000000..5744b2c8d1e --- /dev/null +++ b/regression/memory-analyzer/void_pointer/void_pointer.h @@ -0,0 +1,15 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example4 test the handling of void pointer. +/// The memory-analyzer tries to cast them to char and read some of the data. + +struct a_struct_with_void +{ + int size; + void *data; +} blob; + +void *a_void_pointer; +void *a_second_void_pointer; +void *a_third_void_pointer; From 25b7766dfbdc31e7c567e3b2712544aa1d24c801 Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Thu, 12 Jul 2018 20:12:16 -0400 Subject: [PATCH 6/7] Add Makefiles enabeling memory-analyzer and tests --- regression/memory-analyzer/Makefile | 21 ++++++++++++ src/CMakeLists.txt | 1 + src/Makefile | 3 ++ src/memory-analyzer/Makefile | 37 +++++++++++++++++++++ src/memory-analyzer/module_dependencies.txt | 2 ++ unit/Makefile | 4 ++- unit/module_dependencies.txt | 1 + 7 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 regression/memory-analyzer/Makefile create mode 100644 src/memory-analyzer/Makefile create mode 100644 src/memory-analyzer/module_dependencies.txt diff --git a/regression/memory-analyzer/Makefile b/regression/memory-analyzer/Makefile new file mode 100644 index 00000000000..e6ccfe714c8 --- /dev/null +++ b/regression/memory-analyzer/Makefile @@ -0,0 +1,21 @@ +default: clean tests.log + +clean: + find -name '*.exe' -execdir $(RM) '{}' \; + find -name '*.out' -execdir $(RM) '{}' \; + $(RM) tests.log + +test: + -@ln -s goto-cc ../../src/goto-cc/goto-gcc + @../test.pl -p -c ../compile_example.sh + +tests.log: ../test.pl + -@ln -s goto-cc ../../src/goto-cc/goto-gcc + @../test.pl -p -c ../compile_example.sh + +show: + @for dir in *; do \ + if [ -d "$$dir" ]; then \ + vim -o "$$dir/*.c" "$$dir/*.out"; \ + fi; \ + done; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c07a24b125..c70d8e99852 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -91,6 +91,7 @@ add_subdirectory(jsil) add_subdirectory(json) add_subdirectory(langapi) add_subdirectory(linking) +add_subdirectory(memory-analyzer) add_subdirectory(memory-models) add_subdirectory(pointer-analysis) add_subdirectory(solvers) diff --git a/src/Makefile b/src/Makefile index 7220b6fcb79..4f8bd146dee 100644 --- a/src/Makefile +++ b/src/Makefile @@ -27,6 +27,7 @@ all: cbmc.dir \ goto-cc.dir \ goto-diff.dir \ goto-instrument.dir \ + memory-analyzer.dir\ # Empty last line ############################################################################### @@ -64,6 +65,8 @@ goto-diff.dir: languages goto-programs.dir pointer-analysis.dir \ goto-cc.dir: languages pointer-analysis.dir goto-programs.dir linking.dir +memory-analyzer.dir: util.dir goto-programs.dir + # building for a particular directory $(patsubst %, %.dir, $(DIRS)): diff --git a/src/memory-analyzer/Makefile b/src/memory-analyzer/Makefile new file mode 100644 index 00000000000..d5d940bb8a8 --- /dev/null +++ b/src/memory-analyzer/Makefile @@ -0,0 +1,37 @@ +SRC = analyze_symbol.cpp\ + gdb_api.cpp \ + memory_analyzer_main.cpp \ + memory_analyzer_parse_options.cpp + # Empty last line + +INCLUDES= -I .. + +LIBS = \ + ../ansi-c/ansi-c.a \ + ../goto-programs/goto-programs.a \ + ../linking/linking.a \ + ../util/util.a \ + ../big-int/big-int.a \ + ../langapi/langapi.a + + + +CLEANFILES = memory-analyzer$(EXEEXT) + +include ../config.inc +include ../common + +all: memory-analyzer$(EXEEXT) + + + +############################################################################### + +memory-analyzer$(EXEEXT): $(OBJ) + $(LINKBIN) + + +.PHONY: memory-analyser-mac-signed + +memory-analyser-mac-signed: memory-analyzer$(EXEEXT) + codesign -v -s $(OSX_IDENTITY) memory-analyzer$(EXEEXT) diff --git a/src/memory-analyzer/module_dependencies.txt b/src/memory-analyzer/module_dependencies.txt new file mode 100644 index 00000000000..7436825e815 --- /dev/null +++ b/src/memory-analyzer/module_dependencies.txt @@ -0,0 +1,2 @@ +util +goto-programs \ No newline at end of file diff --git a/unit/Makefile b/unit/Makefile index 1e4dc900bdd..30dbf824d83 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -17,6 +17,7 @@ SRC += unit_tests.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ goto-programs/goto_trace_output.cpp \ path_strategies.cpp \ + memory-analyzer/gdb_api.cpp \ solvers/floatbv/float_utils.cpp \ solvers/refinement/array_pool/array_pool.cpp \ solvers/refinement/string_constraint_generator_valueof/calculate_max_string_length.cpp \ @@ -97,7 +98,8 @@ TESTS = unit_tests$(EXEEXT) \ miniBDD$(EXEEXT) \ # Empty last line -CLEANFILES = $(TESTS) +CLEANFILES = $(TESTS)\ + #Empty last line all: cprover.dir testing-utils.dir $(MAKE) $(MAKEARGS) $(TESTS) diff --git a/unit/module_dependencies.txt b/unit/module_dependencies.txt index 1640922e72d..9b9375c73bf 100644 --- a/unit/module_dependencies.txt +++ b/unit/module_dependencies.txt @@ -5,6 +5,7 @@ goto-programs goto-symex json langapi # should go away +memory-analyzer solvers/flattening solvers/floatbv solvers/miniBDD From a455b5d850ee3a58964d4510dcd9cf3507bce7af Mon Sep 17 00:00:00 2001 From: Malte Mues Date: Mon, 30 Jul 2018 09:35:39 -0400 Subject: [PATCH 7/7] Update unit/Makefile for removing miniBDD.o on clean --- unit/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unit/Makefile b/unit/Makefile index 30dbf824d83..894d17d4191 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -98,7 +98,8 @@ TESTS = unit_tests$(EXEEXT) \ miniBDD$(EXEEXT) \ # Empty last line -CLEANFILES = $(TESTS)\ +CLEANFILES = $(TESTS) \ + miniBDD$(OBJEXT) \ #Empty last line all: cprover.dir testing-utils.dir