1919#include " velox/vector/ConstantVector.h"
2020#include " velox/vector/DecodedVector.h"
2121#include " velox/vector/DictionaryVector.h"
22+ #include " velox/vector/FlatMapVector.h"
2223#include " velox/vector/FlatVector.h"
2324
2425namespace facebook ::nimble {
@@ -348,18 +349,80 @@ uint64_t getRawSizeFromArrayVector(
348349 return rawSize;
349350}
350351
352+ namespace {
353+
351354uint64_t getRawSizeFromMapVector (
355+ const velox::MapVector& mapVector,
356+ const velox::common::Ranges& childRanges,
357+ RawSizeContext& context) {
358+ uint64_t rawSize = 0 ;
359+ rawSize += getRawSizeFromVector (mapVector.mapKeys (), childRanges, context);
360+ rawSize += getRawSizeFromVector (mapVector.mapValues (), childRanges, context);
361+ return rawSize;
362+ }
363+
364+ // For flat map vectors, we need to merge the base ranges with the "valid"
365+ // ranges for each key. Valid ranges for a key are controled by `inMap` buffers,
366+ // which dictate in which rows a particular key is present/active.
367+ uint64_t getRawSizeFromFlatMapVector (
368+ const velox::FlatMapVector& flatMapVector,
369+ const velox::common::Ranges& baseRanges,
370+ RawSizeContext& context) {
371+ uint64_t rawSize = 0 ;
372+
373+ if (baseRanges.size ()) {
374+ velox::common::Ranges keyRanges;
375+
376+ for (size_t i = 0 ; i < flatMapVector.numDistinctKeys (); ++i) {
377+ keyRanges.clear ();
378+
379+ const uint32_t keySize = getRawSizeFromVector (
380+ flatMapVector.distinctKeys (),
381+ velox::common::Ranges::of (i, i + 1 ),
382+ context);
383+
384+ // Process the keys and values for the rows where the key is present.
385+ if (auto & inMaps = flatMapVector.inMapsAt (i)) {
386+ const auto * rawInMaps = inMaps->as <uint64_t >();
387+ for (const auto & row : baseRanges) {
388+ if (velox::bits::isBitSet (rawInMaps, row)) {
389+ keyRanges.add (row, row + 1 );
390+ }
391+ }
392+
393+ rawSize += getRawSizeFromVector (
394+ flatMapVector.mapValuesAt (i), keyRanges, context);
395+ rawSize += keySize * keyRanges.size ();
396+ }
397+ // If there is no inMap buffer, process all rows.
398+ else {
399+ rawSize += getRawSizeFromVector (
400+ flatMapVector.mapValuesAt (i), baseRanges, context);
401+ rawSize += keySize * baseRanges.size ();
402+ }
403+ }
404+ }
405+ return rawSize;
406+ }
407+
408+ } // namespace
409+
410+ uint64_t getRawSizeFromMap (
352411 const velox::VectorPtr& vector,
353412 const velox::common::Ranges& ranges,
354413 RawSizeContext& context) {
355414 VELOX_CHECK_NOT_NULL (vector);
356415 const auto & encoding = vector->encoding ();
357416 const velox::MapVector* mapVector;
417+
358418 const velox::vector_size_t * offsets;
359419 const velox::vector_size_t * sizes;
360420 velox::common::Ranges childRanges;
421+
422+ uint64_t rawSize = 0 ;
361423 uint64_t nullCount = 0 ;
362- auto processRow = [&](size_t row) {
424+
425+ auto processMapRow = [&](size_t row) {
363426 auto begin = offsets[row];
364427 auto end = begin + sizes[row];
365428 // Ensure valid size
@@ -371,6 +434,7 @@ uint64_t getRawSizeFromMapVector(
371434 };
372435
373436 switch (encoding) {
437+ // Handle top-level (regular) Map vectors.
374438 case velox::VectorEncoding::Simple::MAP: {
375439 mapVector = vector->as <velox::MapVector>();
376440 VELOX_CHECK_NOT_NULL (
@@ -388,20 +452,50 @@ uint64_t getRawSizeFromMapVector(
388452 if (velox::bits::isBitNull (nulls, row)) {
389453 ++nullCount;
390454 } else {
391- processRow (row);
455+ processMapRow (row);
392456 }
393457 }
394458 } else {
395459 for (const auto & row : ranges) {
396- processRow (row);
460+ processMapRow (row);
397461 }
398462 }
463+ rawSize += getRawSizeFromMapVector (*mapVector, childRanges, context);
464+ break ;
465+ }
399466
467+ // Handle top-level Flat Map vectors.
468+ case velox::VectorEncoding::Simple::FLAT_MAP: {
469+ auto flatMapVector = vector->as <velox::FlatMapVector>();
470+ VELOX_CHECK_NOT_NULL (
471+ flatMapVector,
472+ " Encoding mismatch on FlatMapVector. Encoding: {}. TypeKind: {}." ,
473+ encoding,
474+ vector->typeKind ());
475+
476+ if (flatMapVector->mayHaveNulls ()) {
477+ const uint64_t * nulls = flatMapVector->rawNulls ();
478+ for (const auto & row : ranges) {
479+ if (velox::bits::isBitNull (nulls, row)) {
480+ ++nullCount;
481+ } else {
482+ childRanges.add (row, row + 1 );
483+ }
484+ }
485+ rawSize +=
486+ getRawSizeFromFlatMapVector (*flatMapVector, childRanges, context);
487+ } else {
488+ rawSize += getRawSizeFromFlatMapVector (*flatMapVector, ranges, context);
489+ }
400490 break ;
401491 }
492+
493+ // Cases when maps or flat maps are wrapped by a constant.
402494 case velox::VectorEncoding::Simple::CONSTANT: {
403495 return getRawSizeFromConstantComplexVector (vector, ranges, context);
404496 }
497+
498+ // Cases when maps or flat maps are wrapped by a dictionary.
405499 case velox::VectorEncoding::Simple::DICTIONARY: {
406500 const auto * dictionaryMapVector =
407501 vector->as <velox::DictionaryVector<velox::ComplexType>>();
@@ -416,49 +510,79 @@ uint64_t getRawSizeFromMapVector(
416510 velox::DecodedVector& decodedVector = localDecodedVector.get ();
417511 decodedVector.decode (*dictionaryMapVector);
418512
419- mapVector = decodedVector.base ()->as <velox::MapVector>();
420- VELOX_CHECK_NOT_NULL (
421- mapVector,
422- " Encoding mismatch on FlatVector. MapVector: {}. TypeKind: {}." ,
423- decodedVector.base ()->encoding (),
424- decodedVector.base ()->typeKind ());
425-
426- offsets = mapVector->rawOffsets ();
427- sizes = mapVector->rawSizes ();
428-
429- if (decodedVector.mayHaveNulls ()) {
430- for (const auto & row : ranges) {
431- if (decodedVector.isNullAt (row)) {
432- ++nullCount;
513+ // Now switch on the inner type of the dictionary; must be either a map
514+ // or a flat map.
515+ switch (decodedVector.base ()->encoding ()) {
516+ // Dictionary wrapped around a map:
517+ case velox::VectorEncoding::Simple::MAP: {
518+ mapVector = decodedVector.base ()->as <velox::MapVector>();
519+ VELOX_CHECK_NOT_NULL (
520+ mapVector,
521+ " Encoding mismatch on FlatVector. MapVector: {}. TypeKind: {}." ,
522+ decodedVector.base ()->encoding (),
523+ decodedVector.base ()->typeKind ());
524+
525+ offsets = mapVector->rawOffsets ();
526+ sizes = mapVector->rawSizes ();
527+
528+ if (decodedVector.mayHaveNulls ()) {
529+ for (const auto & row : ranges) {
530+ if (decodedVector.isNullAt (row)) {
531+ ++nullCount;
532+ } else {
533+ processMapRow (decodedVector.index (row));
534+ }
535+ }
433536 } else {
434- processRow (decodedVector.index (row));
537+ for (const auto & row : ranges) {
538+ processMapRow (decodedVector.index (row));
539+ }
435540 }
541+ rawSize += getRawSizeFromMapVector (*mapVector, childRanges, context);
542+ break ;
436543 }
437- } else {
438- for (const auto & row : ranges) {
439- processRow (decodedVector.index (row));
544+ // Dictionary wrapped around a flat map:
545+ case velox::VectorEncoding::Simple::FLAT_MAP: {
546+ auto flatMapVector = decodedVector.base ()->as <velox::FlatMapVector>();
547+ VELOX_CHECK_NOT_NULL (
548+ flatMapVector,
549+ " Encoding mismatch on FlatMapVector. Encoding: {}. TypeKind: {}." ,
550+ decodedVector.base ()->encoding (),
551+ decodedVector.base ()->typeKind ());
552+
553+ if (decodedVector.mayHaveNulls ()) {
554+ for (const auto & row : ranges) {
555+ if (decodedVector.isNullAt (row)) {
556+ ++nullCount;
557+ } else {
558+ auto idx = decodedVector.index (row);
559+ childRanges.add (idx, idx + 1 );
560+ }
561+ }
562+ } else {
563+ for (const auto & row : ranges) {
564+ auto idx = decodedVector.index (row);
565+ childRanges.add (idx, idx + 1 );
566+ }
567+ }
568+ rawSize +=
569+ getRawSizeFromFlatMapVector (*flatMapVector, childRanges, context);
570+ break ;
440571 }
572+ default :
573+ VELOX_FAIL (
574+ " Unsupported map encoding wrapped by DICTIONARY: {}." , encoding);
441575 }
442-
443576 break ;
444577 }
445- default : {
446- VELOX_FAIL (" Unsupported encoding: {}." , encoding);
447- }
448- }
449-
450- uint64_t rawSize = 0 ;
451- if (childRanges.size ()) {
452- rawSize += getRawSizeFromVector (mapVector->mapKeys (), childRanges, context);
453- rawSize +=
454- getRawSizeFromVector (mapVector->mapValues (), childRanges, context);
578+ default :
579+ VELOX_FAIL (" Unsupported map encoding: {}." , encoding);
455580 }
456581
457582 context.nullCount = nullCount;
458583 if (nullCount) {
459584 rawSize += nullCount * NULL_SIZE;
460585 }
461-
462586 return rawSize;
463587}
464588
@@ -494,10 +618,8 @@ uint64_t getRawSizeFromRowVector(
494618 }
495619 }
496620 } else {
497- // Potentially expensive?
498621 childRangesPtr = &ranges;
499622 }
500-
501623 break ;
502624 }
503625 case velox::VectorEncoding::Simple::CONSTANT: {
@@ -624,7 +746,7 @@ uint64_t getRawSizeFromVector(
624746 return getRawSizeFromArrayVector (vector, ranges, context);
625747 }
626748 case velox::TypeKind::MAP: {
627- return getRawSizeFromMapVector (vector, ranges, context);
749+ return getRawSizeFromMap (vector, ranges, context);
628750 }
629751 case velox::TypeKind::ROW: {
630752 return getRawSizeFromRowVector (vector, ranges, context);
0 commit comments