@@ -30,21 +30,46 @@ StructColumnReader::StructColumnReader(
3030 const std::shared_ptr<const dwio::common::TypeWithId>& requestedType,
3131 const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
3232 ParquetParams& params,
33- common::ScanSpec& scanSpec)
33+ common::ScanSpec& scanSpec,
34+ memory::MemoryPool& pool)
3435 : SelectiveStructColumnReader(requestedType, fileType, params, scanSpec) {
3536 auto & childSpecs = scanSpec_->stableChildren ();
37+ std::vector<int > missingFields;
3638 for (auto i = 0 ; i < childSpecs.size (); ++i) {
3739 auto childSpec = childSpecs[i];
3840 if (childSpecs[i]->isConstant ()) {
3941 continue ;
4042 }
41- auto childFileType = fileType_->childByName (childSpec->fieldName ());
42- auto childRequestedType =
43- requestedType_->childByName (childSpec->fieldName ());
43+ const auto & fieldName = childSpec->fieldName ();
44+ if (!fileType_->containsChild (fieldName)) {
45+ missingFields.emplace_back (i);
46+ continue ;
47+ }
48+ auto childFileType = fileType_->childByName (fieldName);
49+ auto childRequestedType = requestedType_->childByName (fieldName);
4450 addChild (ParquetColumnReader::build (
45- childRequestedType, childFileType, params, *childSpec));
51+ childRequestedType, childFileType, params, *childSpec, pool ));
4652 childSpecs[i]->setSubscript (children_.size () - 1 );
4753 }
54+
55+ if (missingFields.size () > 0 ) {
56+ // Set the struct as null if all the children fields in the output type are
57+ // missing and the number of child fields is more than one.
58+ if (childSpecs.size () > 1 && missingFields.size () == childSpecs.size ()) {
59+ scanSpec_->setConstantValue (
60+ BaseVector::createNullConstant (requestedType_->type (), 1 , &pool));
61+ } else {
62+ // Set null constant for the missing child field of output type.
63+ for (int channel : missingFields) {
64+ childSpecs[channel]->setConstantValue (BaseVector::createNullConstant (
65+ requestedType_->childByName (childSpecs[channel]->fieldName ())
66+ ->type (),
67+ 1 ,
68+ &pool));
69+ }
70+ }
71+ }
72+
4873 auto type = reinterpret_cast <const ParquetTypeWithId*>(fileType_.get ());
4974 if (type->parent ()) {
5075 levelMode_ = reinterpret_cast <const ParquetTypeWithId*>(fileType_.get ())
@@ -54,7 +79,10 @@ StructColumnReader::StructColumnReader(
5479 // this and the child.
5580 auto child = childForRepDefs_;
5681 for (;;) {
57- assert (child);
82+ if (child == nullptr ) {
83+ levelMode_ = LevelMode::kNulls ;
84+ break ;
85+ }
5886 if (child->fileType ().type ()->kind () == TypeKind::ARRAY ||
5987 child->fileType ().type ()->kind () == TypeKind::MAP) {
6088 levelMode_ = LevelMode::kStructOverLists ;
@@ -91,7 +119,6 @@ StructColumnReader::findBestLeaf() {
91119 best = child;
92120 }
93121 }
94- assert (best);
95122 return best;
96123}
97124
0 commit comments