Skip to content

Commit 2888657

Browse files
author
Brian Hulette
committed
Add dictionary vector unit tests
1 parent b0a0c08 commit 2888657

File tree

1 file changed

+55
-6
lines changed

1 file changed

+55
-6
lines changed

js/test/unit/vector-tests.ts

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
import { TextEncoder } from 'text-encoding-utf-8';
1919
import Arrow from '../Arrow';
20-
import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
20+
import { type, TypedArray, TypedArrayConstructor, Vector } from '../../src/Arrow';
21+
import { packBools } from '../../src/util/bit'
2122

2223
const utf8Encoder = new TextEncoder('utf-8');
2324

24-
const { BoolData, FlatData, FlatListData } = Arrow.data;
25-
const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
25+
const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
26+
const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow.vector;
2627
const {
27-
Utf8, Bool,
28+
Dictionary, Utf8, Bool,
2829
Float16, Float32, Float64,
2930
Int8, Int16, Int32, Int64,
3031
Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
310311
let offset = 0;
311312
const offsets = Uint32Array.of(0, ...values.map((d) => { offset += d.length; return offset; }));
312313
const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, offsets, utf8Encoder.encode(values.join(''))));
314+
basicVectorTests(vector, values, ['abc', '123']);
315+
describe(`sliced`, () => {
316+
basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
317+
});
318+
});
319+
320+
describe(`DictionaryVector`, () => {
321+
const dictionary = ['foo', 'bar', 'baz'];
322+
const extras = ['abc', '123']; // values to search for that should NOT be found
323+
let offset = 0;
324+
const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += d.length; return offset; }));
325+
const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));
326+
327+
const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
328+
329+
describe(`index with nullCount == 0`, () => {
330+
const indices_data = new FlatData(new Int32(), indices.length, new Uint8Array(0), indices);
331+
332+
const values = Array.from(indices).map((d) => dictionary[d]);
333+
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
334+
335+
basicVectorTests(vector, values, extras);
336+
337+
describe(`sliced`, () => {
338+
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
339+
})
340+
});
341+
342+
describe(`index with nullCount > 0`, () => {
343+
const validity = Array.from({length: indices.length}, () => Math.random() > 0.2 ? true : false);
344+
const indices_data = new FlatData(new Int32(), indices.length, packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 0));
345+
const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
346+
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
347+
348+
basicVectorTests(vector, values, ['abc', '123']);
349+
describe(`sliced`, () => {
350+
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
351+
});
352+
});
353+
});
354+
355+
// Creates some basic tests for the given vector.
356+
// Verifies that:
357+
// - `get` and the native iterator return the same data as `values`
358+
// - `indexOf` returns the same indices as `values`
359+
function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
360+
const n = values.length;
361+
313362
test(`gets expected values`, () => {
314363
let i = -1;
315364
while (++i < n) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
325374
}
326375
});
327376
test(`indexOf returns expected values`, () => {
328-
let testValues = values.concat(['abc', '12345']);
377+
let testValues = values.concat(extras);
329378

330379
for (const value of testValues) {
331380
const expected = values.indexOf(value);
332381
expect(vector.indexOf(value)).toEqual(expected);
333382
}
334383
});
335-
});
384+
}
336385

337386
function toMap<T>(entries: Record<string, T>, keys: string[]) {
338387
return keys.reduce((map, key) => {

0 commit comments

Comments
 (0)