@@ -23,21 +23,46 @@ StructColumnReader::StructColumnReader(
2323 const std::shared_ptr<const dwio::common::TypeWithId>& requestedType,
2424 const std::shared_ptr<const dwio::common::TypeWithId>& dataType,
2525 ParquetParams& params,
26- common::ScanSpec& scanSpec)
26+ common::ScanSpec& scanSpec,
27+ memory::MemoryPool& pool)
2728 : SelectiveStructColumnReader(requestedType, dataType, params, scanSpec) {
2829 auto & childSpecs = scanSpec_->stableChildren ();
30+ std::vector<int > missingFields;
2931 for (auto i = 0 ; i < childSpecs.size (); ++i) {
3032 auto childSpec = childSpecs[i];
3133 if (childSpecs[i]->isConstant ()) {
3234 continue ;
3335 }
34- auto childDataType = fileType_->childByName (childSpec->fieldName ());
35- auto childRequestedType =
36- requestedType_->childByName (childSpec->fieldName ());
36+ const auto & fieldName = childSpecs[i]->fieldName ();
37+ if (!fileType_->containsChild (fieldName)) {
38+ missingFields.emplace_back (i);
39+ continue ;
40+ }
41+ auto childDataType = fileType_->childByName (fieldName);
42+ auto childRequestedType = requestedType_->childByName (fieldName);
3743 addChild (ParquetColumnReader::build (
38- childRequestedType, childDataType, params, *childSpec));
44+ childRequestedType, childDataType, params, *childSpec, pool ));
3945 childSpecs[i]->setSubscript (children_.size () - 1 );
4046 }
47+
48+ if (missingFields.size () > 0 ) {
49+ // Set the struct as null if all the children fields in the output type are
50+ // missing and the number of child fields is more than one.
51+ if (childSpecs.size () > 1 && missingFields.size () == childSpecs.size ()) {
52+ scanSpec_->setConstantValue (
53+ BaseVector::createNullConstant (requestedType_->type (), 1 , &pool));
54+ } else {
55+ // Set null constant for the missing child field of output type.
56+ for (int channel : missingFields) {
57+ childSpecs[channel]->setConstantValue (BaseVector::createNullConstant (
58+ requestedType_->childByName (childSpecs[channel]->fieldName ())
59+ ->type (),
60+ 1 ,
61+ &pool));
62+ }
63+ }
64+ }
65+
4166 auto type = reinterpret_cast <const ParquetTypeWithId*>(fileType_.get ());
4267 if (type->parent ()) {
4368 levelMode_ = reinterpret_cast <const ParquetTypeWithId*>(fileType_.get ())
@@ -47,7 +72,10 @@ StructColumnReader::StructColumnReader(
4772 // this and the child.
4873 auto child = childForRepDefs_;
4974 for (;;) {
50- assert (child);
75+ if (child == nullptr ) {
76+ levelMode_ = LevelMode::kNulls ;
77+ break ;
78+ }
5179 if (child->fileType ().type ()->kind () == TypeKind::ARRAY ||
5280 child->fileType ().type ()->kind () == TypeKind::MAP) {
5381 levelMode_ = LevelMode::kStructOverLists ;
0 commit comments