From b61cb5612e41fb98e4b99e0e25bfbb3e54fcd29a Mon Sep 17 00:00:00 2001 From: svorenova Date: Fri, 13 Oct 2017 16:36:09 +0100 Subject: [PATCH 1/2] Resolving signature/descriptor mismatch for methods We want to use signature whenever possible. In certain cases, the number of input parameters in the descriptor is higher than in the signature. For example for enums and inner classes, in these cases descriptor has the correct form (although without generics information). --- .../java_bytecode_convert_class.cpp | 3 +- .../java_bytecode_convert_method.cpp | 75 ++++++++++++++++++- .../java_bytecode_convert_method.h | 3 +- 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 9270e1fa0cd..0b0b71a1d09 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -203,7 +203,8 @@ void java_bytecode_convert_classt::convert(const classt &c) *class_symbol, method_identifier, method, - symbol_table); + symbol_table, + get_message_handler()); lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index c342ff0f766..d9cbab10fed 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -248,6 +248,69 @@ const exprt java_bytecode_convert_methodt::variable( } } +/// Returns the member type for a method, based on signature or descriptor +/// \param descriptor +/// descriptor of the method +/// \param signature +/// signature of the method +/// \param class_name +/// string containing the name of the corresponding class +/// \param method_name +/// string containing the name of the method +/// \param message_handler +/// message handler to collect warnings +/// \return +/// the constructed member type +typet member_type_lazy(const std::string &descriptor, + const optionalt &signature, + const std::string &class_name, + const std::string &method_name, + message_handlert &message_handler) +{ + // In order to construct the method type, we can either use signature or + // descriptor. Since only signature contains the generics info, we want to + // use signature whenever possible. We revert to using descriptor if (1) the + // signature does not exist, (2) an unsupported generics are present or + // (3) in the special case when the number of parameters in the descriptor + // does not match the number of parameters in the signature - e.g. for + // certain types of inner classes and enums (see unit tests for examples). + + messaget message(message_handler); + + typet member_type_from_descriptor=java_type_from_string(descriptor); + INVARIANT(member_type_from_descriptor.id()==ID_code, "Must be code type"); + if(signature.has_value()) + { + try + { + typet member_type_from_signature=java_type_from_string( + signature.value(), + class_name); + INVARIANT(member_type_from_signature.id()==ID_code, "Must be code type"); + if(to_code_type(member_type_from_signature).parameters().size()== + to_code_type(member_type_from_descriptor).parameters().size()) + { + return member_type_from_signature; + } + else + { + message.warning() << "method: " << class_name << "." << method_name + << "\n signature: " << signature.value() << "\n descriptor: " + << descriptor << "\n different number of parameters, reverting to " + "descriptor" << message.eom; + } + } + catch(unsupported_java_class_signature_exceptiont &e) + { + message.warning() << "method: " << class_name << "." << method_name + << "\n could not parse signature: " << signature.value() << "\n " + << e.what() << "\n" << " reverting to descriptor: " + << descriptor << message.eom; + } + } + return member_type_from_descriptor; +} + /// This creates a method symbol in the symtab, but doesn't actually perform /// method conversion just yet. The caller should call /// java_bytecode_convert_method later to give the symbol/method a body. @@ -256,14 +319,22 @@ const exprt java_bytecode_convert_methodt::variable( /// (e.g. "x.y.z.f:(I)") /// `m`: parsed method object to convert /// `symbol_table`: global symbol table (will be modified) +/// `message_handler`: message handler to collect warnings void java_bytecode_convert_method_lazy( const symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, - symbol_tablet &symbol_table) + symbol_tablet &symbol_table, + message_handlert &message_handler) { symbolt method_symbol; - typet member_type=java_type_from_string(m.descriptor); + + typet member_type=member_type_lazy( + m.descriptor, + m.signature, + id2string(class_symbol.name), + id2string(m.base_name), + message_handler); method_symbol.name=method_identifier; method_symbol.base_name=m.base_name; diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 6aa68810e60..6fd97a4b2d3 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -53,6 +53,7 @@ void java_bytecode_convert_method_lazy( const symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &, - symbol_tablet &symbol_table); + symbol_tablet &symbol_table, + message_handlert &); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H From 09431fd41629f600a274c9ff2385ec4b73073f23 Mon Sep 17 00:00:00 2001 From: svorenova Date: Mon, 16 Oct 2017 16:16:26 +0100 Subject: [PATCH 2/2] Adding unit tests for the signature/descripture mismatch --- .../AbstractGenericClass.class | Bin 0 -> 421 bytes .../AbstractGenericClass.java | 4 + .../Outer$Inner.class | Bin 0 -> 618 bytes .../Outer$InnerEnum.class | Bin 0 -> 848 bytes .../java_bytecode_parse_generics/Outer.class | Bin 0 -> 458 bytes .../java_bytecode_parse_generics/Outer.java | 17 ++++ .../parse_derived_generic_class.cpp | 9 +- .../parse_generic_class.cpp | 87 +++++++++--------- .../parse_signature_descriptor_mismatch.cpp | 80 ++++++++++++++++ unit/testing-utils/require_symbol.cpp | 29 ++++++ unit/testing-utils/require_symbol.h | 25 +++++ 11 files changed, 199 insertions(+), 52 deletions(-) create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/AbstractGenericClass.class create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/AbstractGenericClass.java create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/Outer$Inner.class create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/Outer$InnerEnum.class create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/Outer.class create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/Outer.java create mode 100644 unit/java_bytecode/java_bytecode_parse_generics/parse_signature_descriptor_mismatch.cpp create mode 100644 unit/testing-utils/require_symbol.cpp create mode 100644 unit/testing-utils/require_symbol.h diff --git a/unit/java_bytecode/java_bytecode_parse_generics/AbstractGenericClass.class b/unit/java_bytecode/java_bytecode_parse_generics/AbstractGenericClass.class new file mode 100644 index 0000000000000000000000000000000000000000..cbfa2cb7b184b90686c08afef32c7fde510da40c GIT binary patch literal 421 zcmZvYPfNov7{;Hh|6HwZ&LM~=Z_|Ujc(SE126`!aSlK>{s-FnN!AP}$t&PrBE`T7rd@RbFdS7d-)BXbta-T4P%P+ z7jji{-6-liiPD>7&0qPn;N@!ikgY|o!}vd9@E291a&arodr!NbbY(6IV8dY;cNH)U z|Ms%a(W4JDgVCfcsY`Cwdce8-0DC83!6S7Eo6 +{ + +} diff --git a/unit/java_bytecode/java_bytecode_parse_generics/Outer$Inner.class b/unit/java_bytecode/java_bytecode_parse_generics/Outer$Inner.class new file mode 100644 index 0000000000000000000000000000000000000000..bab6c799f45418d3dca7282efc6e07c1e7d33ce1 GIT binary patch literal 618 zcmZuuO;5r=5Pe%dN=x|=6+ci!<$!3wgEt6?A<>u+IY78Cb%Q0fn9`c~vpf(J5B>mu zlyPmwmQU229M6XddZ;o`e7h^3AG*12|5gw+7er<&^jA2 z#JjTxnvv@{p12R6#=;-+u}esyH?z4r;=UvAOE4~nF(mFha?4<;s5}dTGtsoFLdgdU z5+AiD&{(Z3u>7Lb95_=yRwwnH#JeNDMRHL1KnnY^UY0Z)H{7E3=flMOJkkfD_$HeB{ zrl8uok&4oNhLt%A$E^VaPd`Oe`_ddWyC02|OtXa8t)>Q7yFr{2ZWiRUSKV6#`{a$& zpJYi8zKwLhj`8lL?$kw9U&4Zm7ibE!$e#aRQS{JW<3*ju*=4 z4bQ?LCDpRwV%RG-Z7#7>fM5GO%icM7MK49rqhYnWE|XRqHe4)%N}=5b&H8h`f_+En z0@i2RQ42IyOl*&t>56^z%7VHg=kK~z*+nYaKKvT@}Qq9K}?Ok}}?eZdAxhGOQ&#K&@F;=%{; zp^SH`CZKJ4+w=AGw!I&pZ|?xkapb{9)5X4v0~d#c%B|Kixz_oD;G6Xz7HKL|7cGKg z2%+3lT4jBL-R_JDrNQ!E5^AB+@-AOYWD2l6ma+$x65Mc@XEF&0 z&2`uFtv-Q)fIwcSX{lL5qD0;vC8VoXuvsr1-&q^;d3lA qF07jzeY1A3g&M2+8rf!LqruU_4tCAF$EU%}LWzfS*@DaS2COfuw_wBo literal 0 HcmV?d00001 diff --git a/unit/java_bytecode/java_bytecode_parse_generics/Outer.java b/unit/java_bytecode/java_bytecode_parse_generics/Outer.java new file mode 100644 index 00000000000..0360a6adf15 --- /dev/null +++ b/unit/java_bytecode/java_bytecode_parse_generics/Outer.java @@ -0,0 +1,17 @@ +public class Outer +{ + private class Inner + { + private final AbstractGenericClass u; + + Inner (AbstractGenericClass t) + { + this.u = t; + } + } + + private enum InnerEnum + { + + } +} diff --git a/unit/java_bytecode/java_bytecode_parse_generics/parse_derived_generic_class.cpp b/unit/java_bytecode/java_bytecode_parse_generics/parse_derived_generic_class.cpp index 0cab10c80e5..c0826e63cd0 100644 --- a/unit/java_bytecode/java_bytecode_parse_generics/parse_derived_generic_class.cpp +++ b/unit/java_bytecode/java_bytecode_parse_generics/parse_derived_generic_class.cpp @@ -17,6 +17,7 @@ #include #include #include +#include SCENARIO( "java_bytecode_parse_derived_generic_class", @@ -33,12 +34,8 @@ SCENARIO( REQUIRE(new_symbol_table.has_symbol(class_prefix)); const symbolt &derived_symbol=new_symbol_table.lookup_ref(class_prefix); - REQUIRE(derived_symbol.is_type); - const typet &derived_type=derived_symbol.type; - REQUIRE(derived_type.id()==ID_struct); - const class_typet &class_type=to_class_type(derived_type); - REQUIRE(class_type.is_class()); - REQUIRE_FALSE(class_type.get_bool(ID_incomplete_class)); + const class_typet &derived_class_type= + require_symbol::require_complete_class(derived_symbol); // TODO(tkiley): Currently we do not support extracting information // about the base classes generic information. diff --git a/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp b/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp index f49c9022887..ade836cb549 100644 --- a/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp +++ b/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp @@ -174,57 +174,52 @@ SCENARIO( .has_symbol("java::generics$bound_element.f:()Ljava/lang/Number;")); // TODO: methods should have generic return type (the tests needs to be -// extended), reintroduce when the issue of signature/descriptor for methods is -// resolved -// THEN("The method should have generic return type") -// { -// const symbolt &method_symbol= -// new_symbol_table -// .lookup("java::generics$bound_element.f:()Ljava/lang/Number;") -// .value().get(); -// const typet &symbol_type=method_symbol.type; -// -// REQUIRE(symbol_type.id()==ID_code); -// -// const code_typet &code=to_code_type(symbol_type); -// } +// extended) + THEN("The method should have generic return type") + { + const symbolt &method_symbol= + new_symbol_table + .lookup_ref("java::generics$bound_element.f:()Ljava/lang/Number;"); + const typet &symbol_type=method_symbol.type; + + REQUIRE(symbol_type.id()==ID_code); + + const code_typet &code=to_code_type(symbol_type); + } REQUIRE( new_symbol_table .has_symbol("java::generics$bound_element.g:(Ljava/lang/Number;)V")); + THEN("The method should have a generic parameter.") + { + const symbolt &method_symbol= + new_symbol_table + .lookup_ref("java::generics$bound_element.g:(Ljava/lang/Number;)V"); + const typet &symbol_type=method_symbol.type; -// TODO: methods are not recognized as generic, reintroduce when -// the issue of signature/descriptor for methods is resolved -// THEN("The method should have a generic parameter.") -// { -// const symbolt &method_symbol= -// new_symbol_table -// .lookup("java::generics$bound_element.g:(Ljava/lang/Number;)V"); -// const typet &symbol_type=method_symbol.type; -// -// REQUIRE(symbol_type.id()==ID_code); -// -// const code_typet &code=to_code_type(symbol_type); -// -// bool found=false; -// for(const auto &p : code.parameters()) -// { -// if(p.get_identifier()== -// "java::generics$bound_element.g:(Ljava/lang/Number;)V::e") -// { -// found=true; -// const typet &t=p.type(); -// REQUIRE(is_java_generic_parameter(p.type())); -// const java_generic_parametert &gen_type= -// to_java_generic_parameter(p.type()); -// const symbol_typet &type_var=gen_type.type_variable(); -// REQUIRE(type_var.get_identifier()== -// "java::generics$bound_element::NUM"); -// break; -// } -// } -// REQUIRE(found); -// } + REQUIRE(symbol_type.id()==ID_code); + + const code_typet &code=to_code_type(symbol_type); + + bool found=false; + for(const auto &p : code.parameters()) + { + if(p.get_identifier()== + "java::generics$bound_element.g:(Ljava/lang/Number;)V::e") + { + found=true; + const typet &t=p.type(); + REQUIRE(is_java_generic_parameter(p.type())); + const java_generic_parametert &gen_type= + to_java_generic_parameter(p.type()); + const symbol_typet &type_var=gen_type.type_variable(); + REQUIRE(type_var.get_identifier()== + "java::generics$bound_element::NUM"); + break; + } + } + REQUIRE(found); + } } } GIVEN("A class with multiple bounds") diff --git a/unit/java_bytecode/java_bytecode_parse_generics/parse_signature_descriptor_mismatch.cpp b/unit/java_bytecode/java_bytecode_parse_generics/parse_signature_descriptor_mismatch.cpp new file mode 100644 index 00000000000..40288e62cc4 --- /dev/null +++ b/unit/java_bytecode/java_bytecode_parse_generics/parse_signature_descriptor_mismatch.cpp @@ -0,0 +1,80 @@ +/*******************************************************************\ + + Module: Unit tests for parsing generic classes + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include + +#include +#include +#include +#include +#include +#include + +SCENARIO( + "java_bytecode_parse_signature_descriptor_mismatch", + "[core][java_bytecode][java_bytecode_parse_generics]") +{ + const symbol_tablet &new_symbol_table= + load_java_class( + "Outer", + "./java_bytecode/java_bytecode_parse_generics"); + + const std::string class_prefix="java::Outer"; + REQUIRE(new_symbol_table.has_symbol(class_prefix)); + + const std::string inner_prefix=class_prefix+"$Inner"; + THEN("There is a (complete) symbol for the inner class Inner") + { + REQUIRE(new_symbol_table.has_symbol(inner_prefix)); + + const symbolt &inner_symbol=new_symbol_table.lookup_ref(inner_prefix); + const class_typet &inner_class_type= + require_symbol::require_complete_class(inner_symbol); + } + + THEN("There is a symbol for the constructor of the inner class with correct" + " descriptor") + { + const std::string func_name="."; + const std::string func_descriptor=":(LOuter;LAbstractGenericClass;)V"; + const std::string process_func_name= + inner_prefix+func_name+func_descriptor; + REQUIRE(new_symbol_table.has_symbol(process_func_name)); + + const code_typet func_code= + to_code_type(new_symbol_table.lookup_ref(process_func_name).type); + REQUIRE(func_code.parameters().size()==3); + } + + const std::string inner_enum_prefix=class_prefix+"$InnerEnum"; + THEN("There is a (complete) symbol for the inner enum InnerEnum") + { + REQUIRE(new_symbol_table.has_symbol(inner_enum_prefix)); + + const symbolt &inner_enum_symbol= + new_symbol_table.lookup_ref(inner_enum_prefix); + const class_typet &inner_enum_class_type= + require_symbol::require_complete_class(inner_enum_symbol); + } + + THEN("There is a symbol for the constructor of the inner enum with correct" + " number of parameters") + { + const std::string func_name="."; + const std::string func_descriptor=":(Ljava/lang/String;I)V"; + const std::string process_func_name= + inner_enum_prefix+func_name+func_descriptor; + REQUIRE(new_symbol_table.has_symbol(process_func_name)); + + const code_typet func_code= + to_code_type(new_symbol_table.lookup_ref(process_func_name).type); + REQUIRE(func_code.parameters().size()==3); + } +} diff --git a/unit/testing-utils/require_symbol.cpp b/unit/testing-utils/require_symbol.cpp new file mode 100644 index 00000000000..e291017c5cc --- /dev/null +++ b/unit/testing-utils/require_symbol.cpp @@ -0,0 +1,29 @@ +/*******************************************************************\ + + Module: Unit test utilities + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +/// \file +/// Helper functions for requiring properties of symbols + +#include "require_symbol.h" + +#include + +const class_typet &require_symbol::require_complete_class( + const symbolt &class_symbol) +{ + REQUIRE(class_symbol.is_type); + + const typet &class_symbol_type=class_symbol.type; + REQUIRE(class_symbol_type.id()==ID_struct); + + const class_typet &class_class_type=to_class_type(class_symbol_type); + REQUIRE(class_class_type.is_class()); + REQUIRE_FALSE(class_class_type.get_bool(ID_incomplete_class)); + + return class_class_type; +} diff --git a/unit/testing-utils/require_symbol.h b/unit/testing-utils/require_symbol.h new file mode 100644 index 00000000000..c8d1e636e65 --- /dev/null +++ b/unit/testing-utils/require_symbol.h @@ -0,0 +1,25 @@ +/*******************************************************************\ + + Module: Unit test utilities + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +/// \file +/// Helper functions for requiring properties of symbols + +#include +#include + +#ifndef CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H +#define CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H + +// NOLINTNEXTLINE(readability/namespace) +namespace require_symbol +{ + const class_typet &require_complete_class( + const symbolt &class_symbol); +} + +#endif // CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H