@@ -84,6 +84,11 @@ class ReaderBase {
8484 // / the data still exists in the buffered inputs.
8585 bool isRowGroupBuffered (int32_t rowGroupIndex) const ;
8686
87+ static std::shared_ptr<const dwio::common::TypeWithId> createTypeWithId (
88+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
89+ const RowTypePtr& rowTypePtr,
90+ bool fileColumnNamesReadAsLowerCase);
91+
8792 private:
8893 // Reads and parses file footer.
8994 void loadFileMetaData ();
@@ -589,6 +594,33 @@ std::shared_ptr<const RowType> ReaderBase::createRowType(
589594 std::move (childNames), std::move (childTypes));
590595}
591596
597+ std::shared_ptr<const dwio::common::TypeWithId> ReaderBase::createTypeWithId (
598+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
599+ const RowTypePtr& rowTypePtr,
600+ bool fileColumnNamesReadAsLowerCase) {
601+ if (!fileColumnNamesReadAsLowerCase) {
602+ return inputType;
603+ }
604+ std::vector<std::string> names;
605+ names.reserve (rowTypePtr->names ().size ());
606+ std::vector<TypePtr> types = rowTypePtr->children ();
607+ for (const auto & name : rowTypePtr->names ()) {
608+ std::string childName = name;
609+ folly::toLowerAscii (childName);
610+ names.emplace_back (childName);
611+ }
612+ auto convertedType =
613+ TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
614+
615+ auto children = inputType->getChildren ();
616+ return std::make_shared<const dwio::common::TypeWithId>(
617+ convertedType,
618+ std::move (children),
619+ inputType->id (),
620+ inputType->maxId (),
621+ inputType->column ());
622+ }
623+
592624void ReaderBase::scheduleRowGroups (
593625 const std::vector<uint32_t >& rowGroupIds,
594626 int32_t currentGroup,
@@ -662,13 +694,19 @@ class ParquetRowReader::Impl {
662694 }
663695 ParquetParams params (
664696 pool_, columnReaderStats_, readerBase_->fileMetaData ());
665- auto columnSelector = std::make_shared<ColumnSelector>(
666- ColumnSelector::apply (options_.getSelector (), readerBase_->schema ()));
697+ auto columnSelector = options_.getSelector ()
698+ ? options_.getSelector ()
699+ : std::make_shared<ColumnSelector>(ColumnSelector::apply (
700+ options_.getSelector (), readerBase_->schema ()));
667701 columnReader_ = ParquetColumnReader::build (
668- columnSelector->getSchemaWithId (),
702+ ReaderBase::createTypeWithId (
703+ columnSelector->getSchemaWithId (),
704+ asRowType (columnSelector->getSchemaWithId ()->type ()),
705+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
669706 readerBase_->schemaWithId (), // Id is schema id
670707 params,
671- *options_.getScanSpec ());
708+ *options_.getScanSpec (),
709+ pool_);
672710
673711 filterRowGroups ();
674712 if (!rowGroupIds_.empty ()) {
0 commit comments