17
17
18
18
import { TextEncoder } from 'text-encoding-utf-8' ;
19
19
import Arrow from '../Arrow' ;
20
- import { type , TypedArray , TypedArrayConstructor } from '../../src/Arrow' ;
20
+ import { type , TypedArray , TypedArrayConstructor , Vector } from '../../src/Arrow' ;
21
+ import { packBools } from '../../src/util/bit'
21
22
22
23
const utf8Encoder = new TextEncoder ( 'utf-8' ) ;
23
24
24
- const { BoolData, FlatData, FlatListData } = Arrow . data ;
25
- const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow . vector ;
25
+ const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow . data ;
26
+ const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow . vector ;
26
27
const {
27
- Utf8, Bool,
28
+ Dictionary , Utf8, Bool,
28
29
Float16, Float32, Float64,
29
30
Int8, Int16, Int32, Int64,
30
31
Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
310
311
let offset = 0 ;
311
312
const offsets = Uint32Array . of ( 0 , ...values . map ( ( d ) => { offset += d . length ; return offset ; } ) ) ;
312
313
const vector = new Utf8Vector ( new FlatListData ( new Utf8 ( ) , n , null , offsets , utf8Encoder . encode ( values . join ( '' ) ) ) ) ;
314
+ basicVectorTests ( vector , values , [ 'abc' , '123' ] ) ;
315
+ describe ( `sliced` , ( ) => {
316
+ basicVectorTests ( vector . slice ( 1 , 3 ) , values . slice ( 1 , 3 ) , [ 'foo' , 'abc' ] ) ;
317
+ } ) ;
318
+ } ) ;
319
+
320
+ describe ( `DictionaryVector` , ( ) => {
321
+ const dictionary = [ 'foo' , 'bar' , 'baz' ] ;
322
+ const extras = [ 'abc' , '123' ] ; // values to search for that should NOT be found
323
+ let offset = 0 ;
324
+ const offsets = Uint32Array . of ( 0 , ...dictionary . map ( ( d ) => { offset += d . length ; return offset ; } ) ) ;
325
+ const dictionary_vec = new Utf8Vector ( new FlatListData ( new Utf8 ( ) , dictionary . length , null , offsets , utf8Encoder . encode ( dictionary . join ( '' ) ) ) ) ;
326
+
327
+ const indices = Array . from ( { length : 50 } , ( ) => Math . random ( ) * 3 | 0 ) ;
328
+
329
+ describe ( `index with nullCount == 0` , ( ) => {
330
+ const indices_data = new FlatData ( new Int32 ( ) , indices . length , new Uint8Array ( 0 ) , indices ) ;
331
+
332
+ const values = Array . from ( indices ) . map ( ( d ) => dictionary [ d ] ) ;
333
+ const vector = new DictionaryVector ( new DictionaryData ( new Dictionary ( dictionary_vec . type , indices_data . type ) , dictionary_vec , indices_data ) ) ;
334
+
335
+ basicVectorTests ( vector , values , extras ) ;
336
+
337
+ describe ( `sliced` , ( ) => {
338
+ basicVectorTests ( vector . slice ( 10 , 20 ) , values . slice ( 10 , 20 ) , extras ) ;
339
+ } )
340
+ } ) ;
341
+
342
+ describe ( `index with nullCount > 0` , ( ) => {
343
+ const validity = Array . from ( { length : indices . length } , ( ) => Math . random ( ) > 0.2 ? true : false ) ;
344
+ const indices_data = new FlatData ( new Int32 ( ) , indices . length , packBools ( validity ) , indices , 0 , validity . reduce ( ( acc , d ) => acc + ( d ? 0 : 1 ) , 0 ) ) ;
345
+ const values = Array . from ( indices ) . map ( ( d , i ) => validity [ i ] ? dictionary [ d ] : null ) ;
346
+ const vector = new DictionaryVector ( new DictionaryData ( new Dictionary ( dictionary_vec . type , indices_data . type ) , dictionary_vec , indices_data ) ) ;
347
+
348
+ basicVectorTests ( vector , values , [ 'abc' , '123' ] ) ;
349
+ describe ( `sliced` , ( ) => {
350
+ basicVectorTests ( vector . slice ( 10 , 20 ) , values . slice ( 10 , 20 ) , extras ) ;
351
+ } ) ;
352
+ } ) ;
353
+ } ) ;
354
+
355
+ // Creates some basic tests for the given vector.
356
+ // Verifies that:
357
+ // - `get` and the native iterator return the same data as `values`
358
+ // - `indexOf` returns the same indices as `values`
359
+ function basicVectorTests ( vector : Vector , values : any [ ] , extras : any [ ] ) {
360
+ const n = values . length ;
361
+
313
362
test ( `gets expected values` , ( ) => {
314
363
let i = - 1 ;
315
364
while ( ++ i < n ) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
325
374
}
326
375
} ) ;
327
376
test ( `indexOf returns expected values` , ( ) => {
328
- let testValues = values . concat ( [ 'abc' , '12345' ] ) ;
377
+ let testValues = values . concat ( extras ) ;
329
378
330
379
for ( const value of testValues ) {
331
380
const expected = values . indexOf ( value ) ;
332
381
expect ( vector . indexOf ( value ) ) . toEqual ( expected ) ;
333
382
}
334
383
} ) ;
335
- } ) ;
384
+ }
336
385
337
386
function toMap < T > ( entries : Record < string , T > , keys : string [ ] ) {
338
387
return keys . reduce ( ( map , key ) => {
0 commit comments