@@ -60,8 +60,7 @@ use crate::arrow::converter::{
60
60
DecimalConverter , FixedLenBinaryConverter , FixedSizeArrayConverter ,
61
61
Int96ArrayConverter , Int96Converter , IntervalDayTimeArrayConverter ,
62
62
IntervalDayTimeConverter , IntervalYearMonthArrayConverter ,
63
- IntervalYearMonthConverter , LargeBinaryArrayConverter , LargeBinaryConverter ,
64
- LargeUtf8ArrayConverter , LargeUtf8Converter ,
63
+ IntervalYearMonthConverter , Utf8ArrayConverter , Utf8Converter ,
65
64
} ;
66
65
use crate :: arrow:: record_reader:: buffer:: { ScalarValue , ValuesBuffer } ;
67
66
use crate :: arrow:: record_reader:: { GenericRecordReader , RecordReader } ;
@@ -81,6 +80,11 @@ use crate::schema::types::{
81
80
} ;
82
81
use crate :: schema:: visitor:: TypeVisitor ;
83
82
83
+ mod byte_array;
84
+ mod offset_buffer;
85
+
86
+ pub use byte_array:: make_byte_array_reader;
87
+
84
88
/// Array reader reads parquet data into arrow array.
85
89
pub trait ArrayReader {
86
90
fn as_any ( & self ) -> & dyn Any ;
@@ -1778,57 +1782,43 @@ impl<'a> ArrayReaderBuilder {
1778
1782
null_mask_only,
1779
1783
) ?,
1780
1784
) ) ,
1781
- PhysicalType :: BYTE_ARRAY => {
1782
- if cur_type. get_basic_info ( ) . converted_type ( ) == ConvertedType :: UTF8 {
1783
- if let Some ( ArrowType :: LargeUtf8 ) = arrow_type {
1784
- let converter =
1785
- LargeUtf8Converter :: new ( LargeUtf8ArrayConverter { } ) ;
1786
- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1787
- ByteArrayType ,
1788
- LargeUtf8Converter ,
1789
- > :: new (
1790
- page_iterator,
1791
- column_desc,
1792
- converter,
1793
- arrow_type,
1794
- ) ?) )
1795
- } else {
1796
- use crate :: arrow:: arrow_array_reader:: {
1797
- ArrowArrayReader , StringArrayConverter ,
1798
- } ;
1799
- let converter = StringArrayConverter :: new ( ) ;
1800
- Ok ( Box :: new ( ArrowArrayReader :: try_new (
1801
- * page_iterator,
1802
- column_desc,
1803
- converter,
1804
- arrow_type,
1805
- ) ?) )
1785
+ PhysicalType :: BYTE_ARRAY => match arrow_type {
1786
+ // TODO: Replace with optimised dictionary reader (#171)
1787
+ Some ( ArrowType :: Dictionary ( _, _) ) => {
1788
+ match cur_type. get_basic_info ( ) . converted_type ( ) {
1789
+ ConvertedType :: UTF8 => {
1790
+ let converter = Utf8Converter :: new ( Utf8ArrayConverter { } ) ;
1791
+ Ok ( Box :: new ( ComplexObjectArrayReader :: <
1792
+ ByteArrayType ,
1793
+ Utf8Converter ,
1794
+ > :: new (
1795
+ page_iterator,
1796
+ column_desc,
1797
+ converter,
1798
+ arrow_type,
1799
+ ) ?) )
1800
+ }
1801
+ _ => {
1802
+ let converter = BinaryConverter :: new ( BinaryArrayConverter { } ) ;
1803
+ Ok ( Box :: new ( ComplexObjectArrayReader :: <
1804
+ ByteArrayType ,
1805
+ BinaryConverter ,
1806
+ > :: new (
1807
+ page_iterator,
1808
+ column_desc,
1809
+ converter,
1810
+ arrow_type,
1811
+ ) ?) )
1812
+ }
1806
1813
}
1807
- } else if let Some ( ArrowType :: LargeBinary ) = arrow_type {
1808
- let converter =
1809
- LargeBinaryConverter :: new ( LargeBinaryArrayConverter { } ) ;
1810
- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1811
- ByteArrayType ,
1812
- LargeBinaryConverter ,
1813
- > :: new (
1814
- page_iterator,
1815
- column_desc,
1816
- converter,
1817
- arrow_type,
1818
- ) ?) )
1819
- } else {
1820
- let converter = BinaryConverter :: new ( BinaryArrayConverter { } ) ;
1821
- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1822
- ByteArrayType ,
1823
- BinaryConverter ,
1824
- > :: new (
1825
- page_iterator,
1826
- column_desc,
1827
- converter,
1828
- arrow_type,
1829
- ) ?) )
1830
1814
}
1831
- }
1815
+ _ => make_byte_array_reader (
1816
+ page_iterator,
1817
+ column_desc,
1818
+ arrow_type,
1819
+ null_mask_only,
1820
+ ) ,
1821
+ } ,
1832
1822
PhysicalType :: FIXED_LEN_BYTE_ARRAY
1833
1823
if cur_type. get_basic_info ( ) . converted_type ( )
1834
1824
== ConvertedType :: DECIMAL =>
0 commit comments