@@ -85,6 +85,16 @@ class ReaderBase {
8585 int32_t rowGroupIndex,
8686 const dwio::common::TypeWithId& type) const ;
8787
88+ // / @brief Convert the names of row type to lower case when
89+ // / fileColumnNamesReadAsLowerCase is true.
90+ // / @param rowTypePtr the input row type.
91+ // / @param fileColumnNamesReadAsLowerCase whether to convert names into lower
92+ // / case.
93+ // / @return row type with names converted.
94+ static std::shared_ptr<const RowType> convertRowTypeNames (
95+ const RowTypePtr& rowTypePtr,
96+ bool fileColumnNamesReadAsLowerCase);
97+
8898 private:
8999 // Reads and parses file footer.
90100 void loadFileMetaData ();
@@ -547,22 +557,39 @@ TypePtr ReaderBase::convertType(
547557 }
548558}
549559
560+ std::shared_ptr<const RowType> ReaderBase::convertRowTypeNames (
561+ const RowTypePtr& rowTypePtr,
562+ bool fileColumnNamesReadAsLowerCase) {
563+ if (!fileColumnNamesReadAsLowerCase) {
564+ return rowTypePtr;
565+ }
566+ std::vector<std::string> names;
567+ names.reserve (rowTypePtr->names ().size ());
568+ std::vector<TypePtr> types = rowTypePtr->children ();
569+ for (const auto & name : rowTypePtr->names ()) {
570+ std::string childName = name;
571+ folly::toLowerAscii (childName);
572+ names.emplace_back (childName);
573+ }
574+ return TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
575+ }
576+
550577std::shared_ptr<const RowType> ReaderBase::createRowType (
551578 std::vector<std::shared_ptr<const ParquetTypeWithId::TypeWithId>> children,
552579 bool fileColumnNamesReadAsLowerCase) {
553580 std::vector<std::string> childNames;
581+ childNames.reserve (children.size ());
554582 std::vector<TypePtr> childTypes;
555- for (auto & child : children) {
556- auto childName =
557- std::static_pointer_cast<const ParquetTypeWithId>(child)->name_ ;
558- if (fileColumnNamesReadAsLowerCase) {
559- folly::toLowerAscii (childName);
560- }
561- childNames.push_back (std::move (childName));
562- childTypes.push_back (child->type ());
583+ childTypes.reserve (children.size ());
584+ for (const auto & child : children) {
585+ childNames.emplace_back (
586+ std::static_pointer_cast<const ParquetTypeWithId>(child)->name_ );
587+ childTypes.emplace_back (child->type ());
563588 }
564- return TypeFactory<TypeKind::ROW>::create (
565- std::move (childNames), std::move (childTypes));
589+ return convertRowTypeNames (
590+ TypeFactory<TypeKind::ROW>::create (
591+ std::move (childNames), std::move (childTypes)),
592+ fileColumnNamesReadAsLowerCase);
566593}
567594
568595void ReaderBase::scheduleRowGroups (
@@ -640,7 +667,11 @@ ParquetRowReader::ParquetRowReader(
640667 columnReader_ = ParquetColumnReader::build (
641668 readerBase_->schemaWithId (), // Id is schema id
642669 params,
643- *options_.getScanSpec ());
670+ *options_.getScanSpec (),
671+ ReaderBase::convertRowTypeNames (
672+ asRowType (options_.getSelector ()->getSchemaWithId ()->type ()),
673+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
674+ pool_);
644675
645676 filterRowGroups ();
646677 if (!rowGroupIds_.empty ()) {
0 commit comments