diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java index 30a00dea2..a28cebc37 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java @@ -26,13 +26,46 @@ public class RecordVisitor protected final VisitorFormatWrapperImpl _visitorWrapper; /** - * Tracks if the schema for this record has been overridden (by an annotation or other means), and calls to the {@code property} and - * {@code optionalProperty} methods should be ignored. + * Tracks if the schema for this record has been overridden (by an annotation or other means), + * and calls to the {@code property} and {@code optionalProperty} methods should be ignored. */ protected final boolean _overridden; + /** + * When Avro schema for this JavaType ({@code _type}) results in UNION of multiple Avro types, + * _typeSchema keeps track of which Avro type in the UNION represents this JavaType ({@code _type}) + * so that fields of this JavaType can be set to the right Avro type by {@code builtAvroSchema()}. + *
+ * Example: + *
+     *   @JsonSubTypes({
+     *     @JsonSubTypes.Type(value = Apple.class),
+     *     @JsonSubTypes.Type(value = Pear.class) })
+     *   class Fruit {}
+     *
+     *   class Apple extends Fruit {}
+     *   class Orange extends Fruit {}
+     * 
+ * When {@code _type = Fruit.class} + * Then + * _avroSchema if Fruit.class is union of Fruit record, Apple record and Orange record schemas: [ + * { name: Fruit, type: record, fields: [..] }, <--- _typeSchema points here + * { name: Apple, type: record, fields: [..] }, + * { name: Orange, type: record, fields: [..]} + * ] + * _typeSchema points to Fruit.class without subtypes record schema + * + * FIXME: When _typeSchema is not null, then _overridden must be true, therefore (_overridden == true) can be replaced with (_typeSchema != null), + * but it might be considered API change cause _overridden has protected access modifier. + * + * @since 2.19.1 + */ + private final Schema _typeSchema; + + // !!! 19-May-2025: TODO: make final in 2.20 protected Schema _avroSchema; + // !!! 19-May-2025: TODO: make final in 2.20 protected List _fields = new ArrayList<>(); public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperImpl visitorWrapper) @@ -42,32 +75,62 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm _visitorWrapper = visitorWrapper; // Check if the schema for this record is overridden BeanDescription bean = getProvider().getConfig().introspectDirectClassAnnotations(_type); - List subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); AvroSchema ann = bean.getClassInfo().getAnnotation(AvroSchema.class); if (ann != null) { _avroSchema = AvroSchemaHelper.parseJsonSchema(ann.value()); _overridden = true; - } else if (subTypes != null && !subTypes.isEmpty()) { - List unionSchemas = new ArrayList<>(); - try { - for (NamedType subType : subTypes) { - JsonSerializer ser = getProvider().findValueSerializer(subType.getType()); - VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); - ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); - unionSchemas.add(visitor.getAvroSchema()); - } - _avroSchema = Schema.createUnion(unionSchemas); - _overridden = true; - } catch (JsonMappingException jme) { - throw new RuntimeException("Failed to build schema", jme); - } + _typeSchema = null; } else { + // If Avro schema for this _type results in UNION I want to know Avro type where to assign fields _avroSchema = AvroSchemaHelper.initializeRecordSchema(bean); + _typeSchema = _avroSchema; _overridden = false; AvroMeta meta = bean.getClassInfo().getAnnotation(AvroMeta.class); if (meta != null) { _avroSchema.addProp(meta.key(), meta.value()); } + + List subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); + if (subTypes != null && !subTypes.isEmpty()) { + // alreadySeenClasses prevents subType processing in endless loop + Set> alreadySeenClasses = new HashSet<>(); + alreadySeenClasses.add(_type.getRawClass()); + + // At this point calculating hashCode for _typeSchema fails with + // NPE because RecordSchema.fields is NULL + // (see org.apache.avro.Schema.RecordSchema#computeHash). + // Therefore, unionSchemas must not be HashSet (or any other type + // using hashCode() for equality check). + // Set ensures that each subType schema is once in resulting union. + // IdentityHashMap is used because it is using reference-equality. + final Set unionSchemas = Collections.newSetFromMap(new IdentityHashMap<>()); + // Initialize with this schema + if (_type.isConcrete()) { + unionSchemas.add(_typeSchema); + } + + try { + for (NamedType subType : subTypes) { + if (!alreadySeenClasses.add(subType.getType())) { + continue; + } + JsonSerializer ser = getProvider().findValueSerializer(subType.getType()); + VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); + ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); + // Add subType schema into this union, unless it is already there. + Schema subTypeSchema = visitor.getAvroSchema(); + // When subType schema is union itself, include each its type into this union if not there already + if (subTypeSchema.getType() == Type.UNION) { + unionSchemas.addAll(subTypeSchema.getTypes()); + } else { + unionSchemas.add(subTypeSchema); + } + } + _avroSchema = Schema.createUnion(new ArrayList<>(unionSchemas)); + } catch (JsonMappingException jme) { + throw new RuntimeJsonMappingException("Failed to build schema", jme); + } + } } _visitorWrapper.getSchemas().addSchema(type, _avroSchema); } @@ -76,7 +139,7 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm public Schema builtAvroSchema() { if (!_overridden) { // Assumption now is that we are done, so let's assign fields - _avroSchema.setFields(_fields); + _typeSchema.setFields(_fields); } return _avroSchema; } diff --git a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java new file mode 100644 index 000000000..f21675478 --- /dev/null +++ b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java @@ -0,0 +1,294 @@ +package com.fasterxml.jackson.dataformat.avro.schema; + +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.dataformat.avro.AvroMapper; +import com.fasterxml.jackson.dataformat.avro.annotation.AvroNamespace; + +import org.apache.avro.Schema; +import org.apache.avro.reflect.Union; + +import static org.assertj.core.api.Assertions.assertThat; + +public class PolymorphicTypeAnnotationsTest { + + private static final AvroMapper MAPPER = AvroMapper.builder().build(); + // it is easier maintain string schema representation when namespace is constant, rather than being inferred from this class package name + private static final String TEST_NAMESPACE = "test"; + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Cat.class), + @JsonSubTypes.Type(value = Dog.class), + }) + interface AnimalInterface { + } + + static abstract class AbstractMammal implements AnimalInterface { + public int legs; + } + + static class Cat extends AbstractMammal { + public String color; + } + + static class Dog extends AbstractMammal { + public int size; + } + + @Test + public void subclasses_of_interface_test() throws Exception { + // GIVEN + final Schema catSchema = MAPPER.schemaFor(Cat.class).getAvroSchema(); + final Schema dogSchema = MAPPER.schemaFor(Dog.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(AnimalInterface.class).getAvroSchema(); + + // System.out.println("Animal schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + // Because AnimalInterface is interface and AbstractMammal is abstract, they are not expected to be among types in union + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(catSchema, dogSchema); + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Apple.class), + @JsonSubTypes.Type(value = Pear.class), + }) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + static class Fruit { + public boolean eatable; + } + + private static final String FRUIT_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Fruit\",\"namespace\":\"test\",\"fields\":[{\"name\":\"eatable\",\"type\":\"boolean\"}]}"; + + static class Apple extends Fruit { + public String color; + } + + static class Pear extends Fruit { + public int seeds; + } + + @Test + public void jsonSubTypes_on_concrete_class_test() throws Exception { + // GIVEN + final Schema fruitItselfSchema = MAPPER.schemaFrom(FRUIT_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema appleSchema = MAPPER.schemaFor(Apple.class).getAvroSchema(); + final Schema pearSchema = MAPPER.schemaFor(Pear.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Fruit.class).getAvroSchema(); + + // System.out.println("Fruit schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(fruitItselfSchema, appleSchema, pearSchema); + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = LandVehicle.class), + @JsonSubTypes.Type(value = AbstractWaterVehicle.class), + }) + @AvroNamespace(TEST_NAMESPACE) + static class Vehicle { + } + + private static final String VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Vehicle\",\"namespace\":\"test\",\"fields\":[]}"; + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Car.class), + @JsonSubTypes.Type(value = MotorCycle.class), + }) + @AvroNamespace(TEST_NAMESPACE) + static class LandVehicle extends Vehicle { + } + + private static final String LAND_VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"LandVehicle\",\"namespace\":\"test\",\"fields\":[]}"; + + static class Car extends LandVehicle { + } + + static class MotorCycle extends LandVehicle { + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Boat.class), + @JsonSubTypes.Type(value = Submarine.class), + }) + static abstract class AbstractWaterVehicle extends Vehicle { + public int propellers; + } + + static class Boat extends AbstractWaterVehicle { + } + + static class Submarine extends AbstractWaterVehicle { + } + + @Test + public void jsonSubTypes_of_jsonSubTypes_test() throws Exception { + // GIVEN + final Schema vehicleItselfSchema = MAPPER.schemaFrom(VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema landVehicleItselfSchema = MAPPER.schemaFrom(LAND_VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema carSchema = MAPPER.schemaFor(Car.class).getAvroSchema(); + final Schema motorCycleSchema = MAPPER.schemaFor(MotorCycle.class).getAvroSchema(); + final Schema boatSchema = MAPPER.schemaFor(Boat.class).getAvroSchema(); + final Schema submarineSchema = MAPPER.schemaFor(Submarine.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Vehicle.class).getAvroSchema(); + + // System.out.println("Vehicle schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder( + vehicleItselfSchema, + landVehicleItselfSchema, carSchema, motorCycleSchema, + // AbstractWaterVehicle is not here, because it is abstract + boatSchema, submarineSchema); + } + + // Helium is twice in subtypes hierarchy, once as ElementInterface subtype and second time as subtype + // of AbstractGas subtype. This situation may result in + // "Failed to generate `AvroSchema` for ...., problem: (AvroRuntimeException) Duplicate in union:com.fasterxml...PolymorphicTypeAnnotationsTest.Helium" + // error. + @JsonSubTypes({ + @JsonSubTypes.Type(value = AbstractGas.class), + @JsonSubTypes.Type(value = Helium.class), + }) + private interface ElementInterface { + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Helium.class), + @JsonSubTypes.Type(value = Oxygen.class), + }) + static abstract class AbstractGas implements ElementInterface { + public int atomicMass; + } + + private static class Helium extends AbstractGas { + } + + private static class Oxygen extends AbstractGas { + } + + @Test + public void class_is_referenced_twice_in_hierarchy_test() throws Exception { + // GIVEN + final Schema heliumSchema = MAPPER.schemaFor(Helium.class).getAvroSchema(); + final Schema oxygenSchema = MAPPER.schemaFor(Oxygen.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(ElementInterface.class).getAvroSchema(); + + // System.out.println("ElementInterface schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + // ElementInterface and AbstractGas are not concrete classes they are not expected to be among types in union + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(heliumSchema, oxygenSchema); + } + + @JsonSubTypes({ + // Base class being explicitly in @JsonSubTypes led to StackOverflowError exception. + @JsonSubTypes.Type(value = Image.class), + @JsonSubTypes.Type(value = Jpeg.class), + @JsonSubTypes.Type(value = Png.class), + }) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + static class Image { + } + + private static final String IMAGE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Image\",\"namespace\":\"test\",\"fields\":[]}"; + + static class Jpeg extends Image { + } + + static class Png extends Image { + } + + @Test + public void base_class_explicitly_in_JsonSubTypes_annotation_test() throws Exception { + // GIVEN + final Schema imageItselfSchema = MAPPER.schemaFrom(IMAGE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema jpegSchema = MAPPER.schemaFor(Jpeg.class).getAvroSchema(); + final Schema pngSchema = MAPPER.schemaFor(Png.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Image.class).getAvroSchema(); + + // System.out.println("Image schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(imageItselfSchema, jpegSchema, pngSchema); + } + + @Union({ + // Base class being explicitly in @Union led to StackOverflowError exception. + Sport.class, + Football.class, Basketball.class}) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + static class Sport { + } + + private static final String SPORT_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Sport\",\"namespace\":\"test\",\"fields\":[]}"; + + static class Football extends Sport { + } + + static class Basketball extends Sport { + } + + @Test + public void base_class_explicitly_in_Union_annotation_test() throws Exception { + // GIVEN + final Schema sportItselfSchema = MAPPER.schemaFrom(SPORT_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema footballSchema = MAPPER.schemaFor(Football.class).getAvroSchema(); + final Schema basketballSchema = MAPPER.schemaFor(Basketball.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Sport.class).getAvroSchema(); + + //System.out.println("Sport schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(sportItselfSchema, footballSchema, basketballSchema); + } + + @Union({ + // Interface being explicitly in @Union led to StackOverflowError exception. + DocumentInterface.class, + Word.class, Excel.class}) + interface DocumentInterface { + } + + static class Word implements DocumentInterface { + } + + static class Excel implements DocumentInterface { + } + + @Test + public void interface_explicitly_in_Union_annotation_test() throws Exception { + // GIVEN + final Schema wordSchema = MAPPER.schemaFor(Word.class).getAvroSchema(); + final Schema excelSchema = MAPPER.schemaFor(Excel.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(DocumentInterface.class).getAvroSchema(); + + //System.out.println("Document schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(wordSchema, excelSchema); + } +} diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x index 8dec9da09..b94303190 100644 --- a/release-notes/CREDITS-2.x +++ b/release-notes/CREDITS-2.x @@ -232,6 +232,9 @@ Michal Foksa (MichalFoksa@github) (2.19.0) * Contributed #536: (avro) Add Logical Type support for `java.util.UUID` (2.19.0) +* Contributed fix for #589: AvroSchema: Does not include base class for records + with subclasses + (2.19.1) Hunter Herman (hherman1@github) @@ -385,3 +388,9 @@ Manuel Sugawara (@sugmanue) Josh Curry (@seadbrane) * Reported, contributed fix for #571: Unable to deserialize a pojo with IonStruct (2.19.0) + +Rafael Winterhalter (@raphw) + * Reported #589: AvroSchema: Does not include base class for records + with subclasses + (2.19.1) + diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index 033fd9423..bae9b68aa 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -18,6 +18,12 @@ Active maintainers: - +2.19.1 (not yet released) + +#589: AvroSchema: Does not include base class for records with subclasses + (reported by Rafael W) + (fix contributed by Michal F) + 2.19.0 (24-Apr-2025) #300: (smile) Floats are encoded with sign extension while doubles without