-
-
Notifications
You must be signed in to change notification settings - Fork 144
[AVRO] #589: Fix schema not including base class for records with subclasses #593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
03d344a
46cbe12
99311ae
8f09acc
c540e07
88fa1dc
792e0d2
a13d0d6
67c6042
41d3060
31ac1b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,13 +26,46 @@ public class RecordVisitor | |
protected final VisitorFormatWrapperImpl _visitorWrapper; | ||
|
||
/** | ||
* Tracks if the schema for this record has been overridden (by an annotation or other means), and calls to the {@code property} and | ||
* {@code optionalProperty} methods should be ignored. | ||
* Tracks if the schema for this record has been overridden (by an annotation or other means), | ||
* and calls to the {@code property} and {@code optionalProperty} methods should be ignored. | ||
*/ | ||
protected final boolean _overridden; | ||
|
||
/** | ||
* When Avro schema for this JavaType ({@code _type}) results in UNION of multiple Avro types, | ||
* _typeSchema keeps track of which Avro type in the UNION represents this JavaType ({@code _type}) | ||
* so that fields of this JavaType can be set to the right Avro type by {@code builtAvroSchema()}. | ||
*<br> | ||
* Example: | ||
* <pre> | ||
* @JsonSubTypes({ | ||
* @JsonSubTypes.Type(value = Apple.class), | ||
* @JsonSubTypes.Type(value = Pear.class) }) | ||
* class Fruit {} | ||
* | ||
* class Apple extends Fruit {} | ||
* class Orange extends Fruit {} | ||
* </pre> | ||
* When {@code _type = Fruit.class} | ||
* Then | ||
* _avroSchema if Fruit.class is union of Fruit record, Apple record and Orange record schemas: [ | ||
* { name: Fruit, type: record, fields: [..] }, <--- _typeSchema points here | ||
* { name: Apple, type: record, fields: [..] }, | ||
* { name: Orange, type: record, fields: [..]} | ||
* ] | ||
* _typeSchema points to Fruit.class without subtypes record schema | ||
* | ||
* FIXME: When _typeSchema is not null, then _overridden must be true, therefore (_overridden == true) can be replaced with (_typeSchema != null), | ||
* but it might be considered API change cause _overridden has protected access modifier. | ||
* | ||
* @since 2.19.1 | ||
*/ | ||
private final Schema _typeSchema; | ||
|
||
// !!! 19-May-2025: TODO: make final in 2.20 | ||
protected Schema _avroSchema; | ||
|
||
// !!! 19-May-2025: TODO: make final in 2.20 | ||
protected List<Schema.Field> _fields = new ArrayList<>(); | ||
|
||
public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperImpl visitorWrapper) | ||
|
@@ -42,32 +75,62 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm | |
_visitorWrapper = visitorWrapper; | ||
// Check if the schema for this record is overridden | ||
BeanDescription bean = getProvider().getConfig().introspectDirectClassAnnotations(_type); | ||
List<NamedType> subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); | ||
AvroSchema ann = bean.getClassInfo().getAnnotation(AvroSchema.class); | ||
if (ann != null) { | ||
_avroSchema = AvroSchemaHelper.parseJsonSchema(ann.value()); | ||
_overridden = true; | ||
} else if (subTypes != null && !subTypes.isEmpty()) { | ||
List<Schema> unionSchemas = new ArrayList<>(); | ||
try { | ||
for (NamedType subType : subTypes) { | ||
JsonSerializer<?> ser = getProvider().findValueSerializer(subType.getType()); | ||
VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); | ||
ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); | ||
unionSchemas.add(visitor.getAvroSchema()); | ||
} | ||
_avroSchema = Schema.createUnion(unionSchemas); | ||
_overridden = true; | ||
} catch (JsonMappingException jme) { | ||
throw new RuntimeException("Failed to build schema", jme); | ||
} | ||
_typeSchema = null; | ||
} else { | ||
// If Avro schema for this _type results in UNION I want to know Avro type where to assign fields | ||
_avroSchema = AvroSchemaHelper.initializeRecordSchema(bean); | ||
_typeSchema = _avroSchema; | ||
_overridden = false; | ||
AvroMeta meta = bean.getClassInfo().getAnnotation(AvroMeta.class); | ||
if (meta != null) { | ||
_avroSchema.addProp(meta.key(), meta.value()); | ||
} | ||
|
||
List<NamedType> subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); | ||
if (subTypes != null && !subTypes.isEmpty()) { | ||
// alreadySeenClasses prevents subType processing in endless loop | ||
Set<Class<?>> alreadySeenClasses = new HashSet<>(); | ||
alreadySeenClasses.add(_type.getRawClass()); | ||
|
||
// At this point calculating hashCode for _typeSchema fails with | ||
// NPE because RecordSchema.fields is NULL | ||
// (see org.apache.avro.Schema.RecordSchema#computeHash). | ||
// Therefore, unionSchemas must not be HashSet (or any other type | ||
// using hashCode() for equality check). | ||
// Set ensures that each subType schema is once in resulting union. | ||
// IdentityHashMap is used because it is using reference-equality. | ||
final Set<Schema> unionSchemas = Collections.newSetFromMap(new IdentityHashMap<>()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But if we have identity-based Making that chance will not fail any of added tests at least. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. It happens when a class is references multiple times in hierarchy (maybe by mistake). In the example below Helium is subtype of Element and also subtype of Gas. Where Gas schema is union of
Element schema is union of all type in
When you collect all types togetjer without identity check you would get invalid union of
because Helium is twice. See use case 4 "Class referenced twice in @JsonSubTypes hierarchy**" above or Change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok yes, right, but: I think duplicates are actually caught by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both duplicate checks are needed, each check is on different level. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But like I said, changing Set to List did not fail any of the tests. So something is odd. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :( Hmmm, just tested:
Fails There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok I must have been hallucinating. Apologies for noise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no problem, it happens. |
||
// Initialize with this schema | ||
if (_type.isConcrete()) { | ||
unionSchemas.add(_typeSchema); | ||
} | ||
|
||
try { | ||
for (NamedType subType : subTypes) { | ||
if (!alreadySeenClasses.add(subType.getType())) { | ||
continue; | ||
} | ||
JsonSerializer<?> ser = getProvider().findValueSerializer(subType.getType()); | ||
VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); | ||
ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); | ||
// Add subType schema into this union, unless it is already there. | ||
Schema subTypeSchema = visitor.getAvroSchema(); | ||
// When subType schema is union itself, include each its type into this union if not there already | ||
if (subTypeSchema.getType() == Type.UNION) { | ||
unionSchemas.addAll(subTypeSchema.getTypes()); | ||
} else { | ||
unionSchemas.add(subTypeSchema); | ||
} | ||
} | ||
_avroSchema = Schema.createUnion(new ArrayList<>(unionSchemas)); | ||
} catch (JsonMappingException jme) { | ||
throw new RuntimeJsonMappingException("Failed to build schema", jme); | ||
} | ||
} | ||
} | ||
_visitorWrapper.getSchemas().addSchema(type, _avroSchema); | ||
} | ||
|
@@ -76,7 +139,7 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm | |
public Schema builtAvroSchema() { | ||
if (!_overridden) { | ||
// Assumption now is that we are done, so let's assign fields | ||
_avroSchema.setFields(_fields); | ||
_typeSchema.setFields(_fields); | ||
} | ||
return _avroSchema; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can do that as a follow-up for 2.20 (2.x branch)