Skip to content

feat: add support for json register #47

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions benchmarking/benchmarking-app/src/app/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
export const TEST_QUERIES = [
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
'SELECT * as total_count FROM taxi.parquet ORDER BY bcf LIMIT 100',
`
WITH group_by_query AS (
SELECT
hvfhs_license_num,
COUNT(*)
FROM
taxi.parquet
GROUP BY
hvfhs_license_num
),

full_query AS (
SELECT
*
FROM
taxi.parquet
)

SELECT
COUNT(*)
FROM
group_by_query
LEFT JOIN
full_query
ON
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
LIMIT 1
`,
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson',
'SELECT * FROM taxijson WHERE price >= 1.0005812645 LIMIT 100',
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson GROUP BY order_count',
'SELECT * as total_count FROM taxijson ORDER BY seconds_in_bucket LIMIT 100',
];
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ export const MemoryDBMProvider = ({ children }: { children: JSX.Element }) => {
fetchTableFileBuffers: async (table) => {
return [];
},
logger: log,
onEvent: (event) => {
console.info(event);
},
});
log.setLevel('DEBUG');
const dbm = new DBM({
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import axios from 'axios';
import { useState } from 'react';
import TAXI_JSON_DATA from '../../assets/data-sets/taxi.json';
import { useDBM } from '../hooks/dbm-context';
import { useClassicEffect } from '../hooks/use-classic-effect';

Expand All @@ -21,6 +22,12 @@ export const FileLoader = ({ children }: { children: JSX.Element }) => {
buffer: fileBufferView,
});

await fileManager.registerJSON({
json: TAXI_JSON_DATA,
tableName: 'taxijson',
fileName: 'taxi.json',
});

setIsFileLoader(true);
})();
}, []);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { useState } from 'react';
import { TEST_QUERIES } from '../constants';
import { useDBM } from '../hooks/dbm-context';
import { useClassicEffect } from '../hooks/use-classic-effect';

Expand All @@ -14,49 +15,15 @@ export const QueryBenchmarking = () => {

useClassicEffect(() => {
setTotalTime(0);
const testQueries = [
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
'SELECT * as total_count FROM taxi.parquet ORDER BY bcf LIMIT 100',
`
WITH group_by_query AS (
SELECT
hvfhs_license_num,
COUNT(*)
FROM
taxi.parquet
GROUP BY
hvfhs_license_num
),

full_query AS (
SELECT
*
FROM
taxi.parquet
)

SELECT
COUNT(*)
FROM
group_by_query
LEFT JOIN
full_query
ON
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
LIMIT 1
`,
];

setOutput([]);
const promiseArr = [];
const start = performance.now();
for (let i = 0; i < testQueries.length; i++) {
for (let i = 0; i < TEST_QUERIES.length; i++) {
const eachQueryStart = performance.now();

const promiseObj = dbm
.queryWithTableNames(testQueries[i], ['taxi'])
.queryWithTableNames(TEST_QUERIES[i], ['taxi'])
.then((results) => {
const end = performance.now();
const time = end - eachQueryStart;
Expand Down

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion benchmarking/benchmarking-app/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"allowJs": false,
"esModuleInterop": false,
"allowSyntheticDefaultImports": true,
"strict": true
"strict": true,
"resolveJsonModule": true,
},
"files": [],
"include": [],
Expand Down
2 changes: 1 addition & 1 deletion meerkat-dbm/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@devrev/meerkat-dbm",
"version": "0.0.136",
"version": "0.0.14",
"dependencies": {
"tslib": "^2.3.0",
"@duckdb/duckdb-wasm": "^1.28.0",
Expand Down
10 changes: 10 additions & 0 deletions meerkat-dbm/src/dbm/dbm.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { AsyncDuckDB } from '@duckdb/duckdb-wasm';
import log from 'loglevel';
import {
FileBufferStore,
FileJsonStore,
FileManagerType,
} from '../file-manager/file-manager-type';
import { FileData, Table, TableWiseFiles } from '../types';
Expand All @@ -28,6 +29,15 @@ export class MockFileManager implements FileManagerType {
this.tables[prop.tableName].files.push(prop);
}

async registerJSON(prop: FileJsonStore): Promise<void> {
const { json, ...fileData } = prop;

this.registerFileBuffer({
...fileData,
buffer: new Uint8Array(),
});
}

async getFileBuffer(name: string): Promise<Uint8Array> {
const fileBuffer = this.fileBufferStore[name];
if (!fileBuffer) {
Expand Down
3 changes: 1 addition & 2 deletions meerkat-dbm/src/dbm/dbm.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { AsyncDuckDBConnection } from '@duckdb/duckdb-wasm';
import { FileManagerType } from '../file-manager/file-manager-type';
import { DBMEvent } from '../logger/event-types';
import { DBMLogger } from '../logger/logger-types';
import { DBMEvent, DBMLogger } from '../logger';
import { InstanceManagerType } from './instance-manager';

import { DBMConstructorOptions, QueryOptions, QueryQueueItem } from './types';
Expand Down
3 changes: 1 addition & 2 deletions meerkat-dbm/src/dbm/types.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { FileManagerType } from '../file-manager/file-manager-type';
import { DBMEvent } from '../logger/event-types';
import { DBMLogger } from '../logger/logger-types';
import { DBMEvent, DBMLogger } from '../logger';
import { TableWiseFiles } from '../types';
import { InstanceManagerType } from './instance-manager';

Expand Down
66 changes: 52 additions & 14 deletions meerkat-dbm/src/file-manager/file-manager-type.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,41 @@
import { InstanceManagerType } from '../dbm/instance-manager';
import { DBMEvent, DBMLogger } from '../logger';
import { Table, TableWiseFiles } from '../types';

export interface FileManagerConstructorOptions {
/**
* @description
* It manages the lifecycle of the DuckDB database instance.
* It provides methods for obtaining an initialized DuckDB instance and terminating the instance.
*/
instanceManager: InstanceManagerType;

/**
* @description
* Represents an logger instance, which will be used for logging messages throughout the File Manager's execution.
*/
logger?: DBMLogger;

/**
* @description
* A callback function that handles events emitted by the File Manager.
*/
onEvent?: (event: DBMEvent) => void;

/**
* @description
* Configuration options for the File Manager.
*/
options?: {
/**
* Maximum size of the file in DB in bytes
*/
maxFileSize?: number;
};

fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
}

export interface FileManagerType {
/**
* @description
Expand All @@ -16,6 +51,15 @@ export interface FileManagerType {
*/
registerFileBuffer: (props: FileBufferStore) => Promise<void>;

/**
* @description
* Registers a single JSON file in the file manager.
* It converts a JSON object to a Uint8Array by writing it to a Parquet file in a DuckDB database and registers it.
* Also emits an event with the time taken for the conversion.
* @param props - The FileJsonStore object to register.
*/
registerJSON: (props: FileJsonStore) => Promise<void>;

/**
* @description
* Retrieves the file buffer associated with a given file name.
Expand Down Expand Up @@ -73,24 +117,18 @@ export interface FileManagerType {
onDBShutdownHandler: () => Promise<void>;
}


export interface FileBufferStore {
export type BaseFileStore = {
tableName: string;
fileName: string;
buffer: Uint8Array;
staleTime?: number;
cacheTime?: number;
metadata?: object;
}
};

export interface FileManagerConstructorOptions {
fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
instanceManager: InstanceManagerType;
options?: {
/**
* Maximum size of the file in DB in bytes
*/
maxFileSize?: number;
};
}
export type FileBufferStore = BaseFileStore & {
buffer: Uint8Array;
};

export type FileJsonStore = BaseFileStore & {
json: object;
};
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,19 @@ import { InstanceManagerType } from '../../../dbm/instance-manager';
import { FILE_TYPES } from '../../../types';
import { IndexedDBFileManager } from '../indexed-db-file-manager';
import { MeerkatDatabase } from '../meerkat-database';
import log = require('loglevel');

const mockDB = {
registerFileBuffer: async (fileName: string, buffer: Uint8Array) => {
return new Promise((resolve) => {
setTimeout(() => {
resolve([fileName]);
}, 200);
});
},
unregisterFileBuffer: async (fileName: string) => {
return new Promise((resolve) => {
setTimeout(() => {
resolve([fileName]);
}, 200);
});
registerFileBuffer: jest.fn(),
registerFileText: jest.fn(),
copyFileToBuffer: jest.fn(),
registerEmptyFileBuffer: jest.fn(),
connect: async () => {
return {
query: jest.fn(),
insertJSONFromPath: jest.fn(),
close: jest.fn(),
};
},
};

Expand Down Expand Up @@ -71,6 +69,10 @@ describe('IndexedDBFileManager', () => {
return [];
},
instanceManager,
logger: log,
onEvent: (event) => {
console.log(event);
},
});

await fileManager.initializeDB();
Expand Down Expand Up @@ -212,6 +214,24 @@ describe('IndexedDBFileManager', () => {

expect(tableData[0].metadata).toEqual({ test: 'test' });
});

it('should register JSON data', async () => {
const fileJson = {
tableName: 'taxi-json',
fileName: 'taxi-json.parquet',
json: {
test: 'test',
},
};

await fileManager.registerJSON(fileJson);

const tableData = await indexedDB.tablesKey.toArray();
const fileBufferData = await indexedDB.files.toArray();

tableData.some((table) => table.tableName === fileJson.tableName);
fileBufferData.some((file) => file.fileName === fileJson.fileName);
});
});


Loading