@@ -84,6 +84,11 @@ class ReaderBase {
8484 // / the data still exists in the buffered inputs.
8585 bool isRowGroupBuffered (int32_t rowGroupIndex) const ;
8686
87+ static std::shared_ptr<const dwio::common::TypeWithId> createTypeWithId (
88+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
89+ const RowTypePtr& rowTypePtr,
90+ bool fileColumnNamesReadAsLowerCase);
91+
8792 private:
8893 // Reads and parses file footer.
8994 void loadFileMetaData ();
@@ -564,6 +569,33 @@ std::shared_ptr<const RowType> ReaderBase::createRowType(
564569 std::move (childNames), std::move (childTypes));
565570}
566571
572+ std::shared_ptr<const dwio::common::TypeWithId> ReaderBase::createTypeWithId (
573+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
574+ const RowTypePtr& rowTypePtr,
575+ bool fileColumnNamesReadAsLowerCase) {
576+ if (!fileColumnNamesReadAsLowerCase) {
577+ return inputType;
578+ }
579+ std::vector<std::string> names;
580+ names.reserve (rowTypePtr->names ().size ());
581+ std::vector<TypePtr> types = rowTypePtr->children ();
582+ for (const auto & name : rowTypePtr->names ()) {
583+ std::string childName = name;
584+ folly::toLowerAscii (childName);
585+ names.emplace_back (childName);
586+ }
587+ auto convertedType =
588+ TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
589+
590+ auto children = inputType->getChildren ();
591+ return std::make_shared<const dwio::common::TypeWithId>(
592+ convertedType,
593+ std::move (children),
594+ inputType->id (),
595+ inputType->maxId (),
596+ inputType->column ());
597+ }
598+
567599void ReaderBase::scheduleRowGroups (
568600 const std::vector<uint32_t >& rowGroupIds,
569601 int32_t currentGroup,
@@ -630,13 +662,19 @@ ParquetRowReader::ParquetRowReader(
630662 return ; // TODO
631663 }
632664 ParquetParams params (pool_, columnReaderStats_, readerBase_->fileMetaData ());
633- auto columnSelector = std::make_shared<ColumnSelector>(
634- ColumnSelector::apply (options_.getSelector (), readerBase_->schema ()));
665+ // ColumnSelector::apply does not work for schema pruning case.
666+ auto columnSelector = options_.getSelector () == nullptr
667+ ? std::make_shared<ColumnSelector>(ColumnSelector (readerBase_->schema ()))
668+ : options_.getSelector ();
635669 columnReader_ = ParquetColumnReader::build (
636- columnSelector->getSchemaWithId (),
670+ ReaderBase::createTypeWithId (
671+ columnSelector->getSchemaWithId (),
672+ asRowType (columnSelector->getSchemaWithId ()->type ()),
673+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
637674 readerBase_->schemaWithId (), // Id is schema id
638675 params,
639- *options_.getScanSpec ());
676+ *options_.getScanSpec (),
677+ pool_);
640678
641679 filterRowGroups ();
642680 if (!rowGroupIds_.empty ()) {
0 commit comments