@@ -84,6 +84,11 @@ class ReaderBase {
8484 // / the data still exists in the buffered inputs.
8585 bool isRowGroupBuffered (int32_t rowGroupIndex) const ;
8686
87+ static std::shared_ptr<const dwio::common::TypeWithId> createTypeWithId (
88+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
89+ const RowTypePtr& rowTypePtr,
90+ bool fileColumnNamesReadAsLowerCase);
91+
8792 private:
8893 // Reads and parses file footer.
8994 void loadFileMetaData ();
@@ -588,6 +593,33 @@ std::shared_ptr<const RowType> ReaderBase::createRowType(
588593 std::move (childNames), std::move (childTypes));
589594}
590595
596+ std::shared_ptr<const dwio::common::TypeWithId> ReaderBase::createTypeWithId (
597+ const std::shared_ptr<const dwio::common::TypeWithId>& inputType,
598+ const RowTypePtr& rowTypePtr,
599+ bool fileColumnNamesReadAsLowerCase) {
600+ if (!fileColumnNamesReadAsLowerCase) {
601+ return inputType;
602+ }
603+ std::vector<std::string> names;
604+ names.reserve (rowTypePtr->names ().size ());
605+ std::vector<TypePtr> types = rowTypePtr->children ();
606+ for (const auto & name : rowTypePtr->names ()) {
607+ std::string childName = name;
608+ folly::toLowerAscii (childName);
609+ names.emplace_back (childName);
610+ }
611+ auto convertedType =
612+ TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
613+
614+ auto children = inputType->getChildren ();
615+ return std::make_shared<const dwio::common::TypeWithId>(
616+ convertedType,
617+ std::move (children),
618+ inputType->id (),
619+ inputType->maxId (),
620+ inputType->column ());
621+ }
622+
591623void ReaderBase::scheduleRowGroups (
592624 const std::vector<uint32_t >& rowGroupIds,
593625 int32_t currentGroup,
@@ -661,13 +693,19 @@ class ParquetRowReader::Impl {
661693 }
662694 ParquetParams params (
663695 pool_, columnReaderStats_, readerBase_->fileMetaData ());
664- auto columnSelector = std::make_shared<ColumnSelector>(
665- ColumnSelector::apply (options_.getSelector (), readerBase_->schema ()));
696+ auto columnSelector = options_.getSelector ()
697+ ? options_.getSelector ()
698+ : std::make_shared<ColumnSelector>(ColumnSelector::apply (
699+ options_.getSelector (), readerBase_->schema ()));
666700 columnReader_ = ParquetColumnReader::build (
667- columnSelector->getSchemaWithId (),
701+ ReaderBase::createTypeWithId (
702+ columnSelector->getSchemaWithId (),
703+ asRowType (columnSelector->getSchemaWithId ()->type ()),
704+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
668705 readerBase_->schemaWithId (), // Id is schema id
669706 params,
670- *options_.getScanSpec ());
707+ *options_.getScanSpec (),
708+ pool_);
671709
672710 filterRowGroups ();
673711 if (!rowGroupIds_.empty ()) {
0 commit comments