@@ -83,6 +83,11 @@ class ReaderBase {
8383 // / the data still exists in the buffered inputs.
8484 bool isRowGroupBuffered (int32_t rowGroupIndex) const ;
8585
86+ static std::shared_ptr<const dwio::common::TypeWithId> createTypeWithId (
87+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
88+ const RowTypePtr& rowTypePtr,
89+ bool fileColumnNamesReadAsLowerCase);
90+
8691 private:
8792 // Reads and parses file footer.
8893 void loadFileMetaData ();
@@ -563,6 +568,33 @@ std::shared_ptr<const RowType> ReaderBase::createRowType(
563568 std::move (childNames), std::move (childTypes));
564569}
565570
571+ std::shared_ptr<const dwio::common::TypeWithId> ReaderBase::createTypeWithId (
572+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
573+ const RowTypePtr& rowTypePtr,
574+ bool fileColumnNamesReadAsLowerCase) {
575+ if (!fileColumnNamesReadAsLowerCase) {
576+ return inputType;
577+ }
578+ std::vector<std::string> names;
579+ names.reserve (rowTypePtr->names ().size ());
580+ std::vector<TypePtr> types = rowTypePtr->children ();
581+ for (const auto & name : rowTypePtr->names ()) {
582+ std::string childName = name;
583+ folly::toLowerAscii (childName);
584+ names.emplace_back (childName);
585+ }
586+ auto convertedType =
587+ TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
588+
589+ auto children = inputType->getChildren ();
590+ return std::make_shared<const dwio::common::TypeWithId>(
591+ convertedType,
592+ std::move (children),
593+ inputType->id (),
594+ inputType->maxId (),
595+ inputType->column ());
596+ }
597+
566598void ReaderBase::scheduleRowGroups (
567599 const std::vector<uint32_t >& rowGroupIds,
568600 int32_t currentGroup,
@@ -637,13 +669,17 @@ ParquetRowReader::ParquetRowReader(
637669 return ; // TODO
638670 }
639671 ParquetParams params (pool_, columnReaderStats_, readerBase_->fileMetaData ());
640- auto columnSelector = std::make_shared<ColumnSelector>(
641- ColumnSelector::apply ( options_.getSelector (), readerBase_-> schema ()) );
672+ // ColumnSelector::apply does not work for schema pruning case.
673+ auto columnSelector = options_.getSelector ();
642674 columnReader_ = ParquetColumnReader::build (
643- columnSelector->getSchemaWithId (),
675+ ReaderBase::createTypeWithId (
676+ columnSelector->getSchemaWithId (),
677+ asRowType (options_.getSelector ()->getSchemaWithId ()->type ()),
678+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
644679 readerBase_->schemaWithId (), // Id is schema id
645680 params,
646- *options_.getScanSpec ());
681+ *options_.getScanSpec (),
682+ pool_);
647683
648684 filterRowGroups ();
649685 if (!rowGroupIds_.empty ()) {
0 commit comments