15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
+ use arrow:: array:: Array ;
19
+ use arrow:: datatypes:: DataType ;
18
20
use criterion:: { criterion_group, criterion_main, Criterion } ;
19
21
use parquet:: util:: { DataPageBuilder , DataPageBuilderImpl , InMemoryPageIterator } ;
20
22
use parquet:: {
@@ -24,6 +26,7 @@ use parquet::{
24
26
data_type:: { ByteArrayType , Int32Type } ,
25
27
schema:: types:: { ColumnDescPtr , SchemaDescPtr } ,
26
28
} ;
29
+ use rand:: { rngs:: StdRng , Rng , SeedableRng } ;
27
30
use std:: { collections:: VecDeque , sync:: Arc } ;
28
31
29
32
fn build_test_schema ( ) -> SchemaDescPtr {
@@ -47,9 +50,6 @@ const PAGES_PER_GROUP: usize = 2;
47
50
const VALUES_PER_PAGE : usize = 10_000 ;
48
51
const BATCH_SIZE : usize = 8192 ;
49
52
50
- use arrow:: array:: Array ;
51
- use rand:: { rngs:: StdRng , Rng , SeedableRng } ;
52
-
53
53
pub fn seedable_rng ( ) -> StdRng {
54
54
StdRng :: seed_from_u64 ( 42 )
55
55
}
@@ -311,6 +311,46 @@ fn create_string_byte_array_reader(
311
311
make_byte_array_reader ( Box :: new ( page_iterator) , column_desc, None , true ) . unwrap ( )
312
312
}
313
313
314
+ fn create_string_byte_array_dictionary_reader (
315
+ page_iterator : impl PageIterator + ' static ,
316
+ column_desc : ColumnDescPtr ,
317
+ ) -> Box < dyn ArrayReader > {
318
+ use parquet:: arrow:: array_reader:: make_byte_array_dictionary_reader;
319
+ let arrow_type =
320
+ DataType :: Dictionary ( Box :: new ( DataType :: Int32 ) , Box :: new ( DataType :: Utf8 ) ) ;
321
+
322
+ make_byte_array_dictionary_reader (
323
+ Box :: new ( page_iterator) ,
324
+ column_desc,
325
+ Some ( arrow_type) ,
326
+ true ,
327
+ )
328
+ . unwrap ( )
329
+ }
330
+
331
+ fn create_complex_object_byte_array_dictionary_reader (
332
+ page_iterator : impl PageIterator + ' static ,
333
+ column_desc : ColumnDescPtr ,
334
+ ) -> Box < dyn ArrayReader > {
335
+ use parquet:: arrow:: array_reader:: {
336
+ make_byte_array_dictionary_reader, ComplexObjectArrayReader ,
337
+ } ;
338
+ use parquet:: arrow:: converter:: { Utf8ArrayConverter , Utf8Converter } ;
339
+ let arrow_type =
340
+ DataType :: Dictionary ( Box :: new ( DataType :: Int32 ) , Box :: new ( DataType :: Utf8 ) ) ;
341
+
342
+ let converter = Utf8Converter :: new ( Utf8ArrayConverter { } ) ;
343
+ Box :: new (
344
+ ComplexObjectArrayReader :: < ByteArrayType , Utf8Converter > :: new (
345
+ Box :: new ( page_iterator) ,
346
+ column_desc,
347
+ converter,
348
+ Some ( arrow_type) ,
349
+ )
350
+ . unwrap ( ) ,
351
+ )
352
+ }
353
+
314
354
fn add_benches ( c : & mut Criterion ) {
315
355
const EXPECTED_VALUE_COUNT : usize =
316
356
NUM_ROW_GROUPS * PAGES_PER_GROUP * VALUES_PER_PAGE ;
@@ -322,10 +362,7 @@ fn add_benches(c: &mut Criterion) {
322
362
let mandatory_int32_column_desc = schema. column ( 0 ) ;
323
363
let optional_int32_column_desc = schema. column ( 1 ) ;
324
364
let mandatory_string_column_desc = schema. column ( 2 ) ;
325
- // println!("mandatory_string_column_desc: {:?}", mandatory_string_column_desc);
326
365
let optional_string_column_desc = schema. column ( 3 ) ;
327
- // println!("optional_string_column_desc: {:?}", optional_string_column_desc);
328
-
329
366
// primitive / int32 benchmarks
330
367
// =============================
331
368
@@ -541,7 +578,7 @@ fn add_benches(c: &mut Criterion) {
541
578
542
579
// string, dictionary encoded, half NULLs
543
580
let dictionary_string_half_null_data = build_dictionary_encoded_string_page_iterator (
544
- schema,
581
+ schema. clone ( ) ,
545
582
optional_string_column_desc. clone ( ) ,
546
583
0.5 ,
547
584
) ;
@@ -559,6 +596,90 @@ fn add_benches(c: &mut Criterion) {
559
596
} ,
560
597
) ;
561
598
599
+ group. bench_function (
600
+ "read StringDictionary, dictionary encoded, mandatory, no NULLs - old" ,
601
+ |b| {
602
+ b. iter ( || {
603
+ let array_reader = create_complex_object_byte_array_dictionary_reader (
604
+ dictionary_string_no_null_data. clone ( ) ,
605
+ mandatory_string_column_desc. clone ( ) ,
606
+ ) ;
607
+ count = bench_array_reader ( array_reader) ;
608
+ } ) ;
609
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
610
+ } ,
611
+ ) ;
612
+
613
+ group. bench_function (
614
+ "read StringDictionary, dictionary encoded, mandatory, no NULLs - new" ,
615
+ |b| {
616
+ b. iter ( || {
617
+ let array_reader = create_string_byte_array_dictionary_reader (
618
+ dictionary_string_no_null_data. clone ( ) ,
619
+ mandatory_string_column_desc. clone ( ) ,
620
+ ) ;
621
+ count = bench_array_reader ( array_reader) ;
622
+ } ) ;
623
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
624
+ } ,
625
+ ) ;
626
+
627
+ group. bench_function (
628
+ "read StringDictionary, dictionary encoded, optional, no NULLs - old" ,
629
+ |b| {
630
+ b. iter ( || {
631
+ let array_reader = create_complex_object_byte_array_dictionary_reader (
632
+ dictionary_string_no_null_data. clone ( ) ,
633
+ optional_string_column_desc. clone ( ) ,
634
+ ) ;
635
+ count = bench_array_reader ( array_reader) ;
636
+ } ) ;
637
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
638
+ } ,
639
+ ) ;
640
+
641
+ group. bench_function (
642
+ "read StringDictionary, dictionary encoded, optional, no NULLs - new" ,
643
+ |b| {
644
+ b. iter ( || {
645
+ let array_reader = create_string_byte_array_dictionary_reader (
646
+ dictionary_string_no_null_data. clone ( ) ,
647
+ optional_string_column_desc. clone ( ) ,
648
+ ) ;
649
+ count = bench_array_reader ( array_reader) ;
650
+ } ) ;
651
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
652
+ } ,
653
+ ) ;
654
+
655
+ group. bench_function (
656
+ "read StringDictionary, dictionary encoded, optional, half NULLs - old" ,
657
+ |b| {
658
+ b. iter ( || {
659
+ let array_reader = create_complex_object_byte_array_dictionary_reader (
660
+ dictionary_string_half_null_data. clone ( ) ,
661
+ optional_string_column_desc. clone ( ) ,
662
+ ) ;
663
+ count = bench_array_reader ( array_reader) ;
664
+ } ) ;
665
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
666
+ } ,
667
+ ) ;
668
+
669
+ group. bench_function (
670
+ "read StringDictionary, dictionary encoded, optional, half NULLs - new" ,
671
+ |b| {
672
+ b. iter ( || {
673
+ let array_reader = create_string_byte_array_dictionary_reader (
674
+ dictionary_string_half_null_data. clone ( ) ,
675
+ optional_string_column_desc. clone ( ) ,
676
+ ) ;
677
+ count = bench_array_reader ( array_reader) ;
678
+ } ) ;
679
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
680
+ } ,
681
+ ) ;
682
+
562
683
group. finish ( ) ;
563
684
}
564
685
0 commit comments