Skip to content

Commit 4f3957b

Browse files
authored
fix: add a hive partitioned setting (#295)
Not likely to work with with `httpfs` hrefs due to CORS: https://duckdb.org/docs/stable/clients/wasm/extensions#httpfs
1 parent cbdb3a1 commit 4f3957b

File tree

9 files changed

+146
-25
lines changed

9 files changed

+146
-25
lines changed

src/app.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ export default function App() {
5555
const connection = await db.connect();
5656
await connection.query("LOAD spatial;");
5757
await connection.query("LOAD icu;");
58+
await connection.query("LOAD httpfs;");
5859
setConnection(connection);
5960
})();
6061
}

src/components/panel/stac-geoparquet.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ export function StacGeoparquetHrefConnectionPanel({
2929
connection: AsyncDuckDBConnection;
3030
}) {
3131
const setValue = useStore((store) => store.setValue);
32-
const result = useStacGeoparquet({ href, connection });
32+
const hivePartitioning = useStore((store) => store.hivePartitioning);
33+
const result = useStacGeoparquet({ href, connection, hivePartitioning });
3334
useEffect(() => {
3435
if (result.data) setValue(result.data);
3536
}, [result.data, setValue]);

src/components/ui/settings.tsx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ export const SettingsButton = React.forwardRef<
2424
const setRestrictToThreeBandCogs = useStore(
2525
(store) => store.setRestrictToThreeBandCogs
2626
);
27+
const hivePartitioning = useStore((store) => store.hivePartitioning);
28+
const setHivePartitioning = useStore((store) => store.setHivePartitioning);
2729

2830
return (
2931
<Dialog.Root>
@@ -59,6 +61,24 @@ export const SettingsButton = React.forwardRef<
5961
</Text>
6062
</Field.HelperText>
6163
</Field.Root>
64+
<Field.Root mt={4}>
65+
<Switch.Root
66+
checked={hivePartitioning}
67+
onCheckedChange={(e) => setHivePartitioning(e.checked)}
68+
>
69+
<Switch.HiddenInput />
70+
<Switch.Control>
71+
<Switch.Thumb />
72+
</Switch.Control>
73+
<Switch.Label>Hive partitioning</Switch.Label>
74+
</Switch.Root>
75+
<Field.HelperText>
76+
<Text fontSize="sm" color="fg.muted">
77+
When enabled, DuckDB will interpret path segments as
78+
partitions when reading stac-geoparquet files.
79+
</Text>
80+
</Field.HelperText>
81+
</Field.Root>
6282
</Dialog.Body>
6383
<Dialog.CloseTrigger asChild>
6484
<CloseButton size="sm" />

src/components/value/stac-geoparquet-href.tsx

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,16 @@ interface Props {
1616

1717
export default function StacGeoparquetHref({ href, connection }: Props) {
1818
const datetimeFilter = useStore((store) => store.datetimeFilter);
19+
const hivePartitioning = useStore((store) => store.hivePartitioning);
1920
const setStacGeoparquetTable = useStore(
2021
(store) => store.setStacGeoparquetTable
2122
);
22-
const result = useStacGeoparquetTable({ href, connection, datetimeFilter });
23+
const result = useStacGeoparquetTable({
24+
href,
25+
connection,
26+
datetimeFilter,
27+
hivePartitioning,
28+
});
2329

2430
useEffect(() => {
2531
if (result.data?.geometryType && result.data.table)
@@ -41,7 +47,12 @@ export default function StacGeoparquetHref({ href, connection }: Props) {
4147
}
4248

4349
function StacGeoparquetFilter({ href, connection }: Props) {
44-
const result = useStacGeoparquetDatetimeBounds({ href, connection });
50+
const hivePartitioning = useStore((store) => store.hivePartitioning);
51+
const result = useStacGeoparquetDatetimeBounds({
52+
href,
53+
connection,
54+
hivePartitioning,
55+
});
4556
if (result.error)
4657
return (
4758
<ErrorAlert

src/components/value/stac-geoparquet-item-id.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ export default function StacGeoparquetItemId({
1414
connection: AsyncDuckDBConnection;
1515
}) {
1616
const setPickedItem = useStore((store) => store.setPickedItem);
17-
const result = useStacGeoparquetItem({ id, href, connection });
17+
const hivePartitioning = useStore((store) => store.hivePartitioning);
18+
const result = useStacGeoparquetItem({
19+
id,
20+
href,
21+
connection,
22+
hivePartitioning,
23+
});
1824

1925
useEffect(() => {
2026
setPickedItem(result.data);

src/hooks/stac.ts

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,29 +38,37 @@ export function useStacJsonFromFile({ file }: { file: File }) {
3838
export function useStacGeoparquet({
3939
href,
4040
connection,
41+
hivePartitioning,
4142
}: {
4243
href: string;
4344
connection: AsyncDuckDBConnection;
45+
hivePartitioning: boolean;
4446
}) {
4547
return useQuery({
46-
queryKey: ["stac-geoparquet", href],
48+
queryKey: ["stac-geoparquet", href, hivePartitioning],
4749
queryFn: async () => {
48-
return await fetchStacGeoparquet({ href, connection });
50+
return await fetchStacGeoparquet({ href, connection, hivePartitioning });
4951
},
5052
});
5153
}
5254

5355
export function useStacGeoparquetDatetimeBounds({
5456
href,
5557
connection,
58+
hivePartitioning,
5659
}: {
5760
href: string;
5861
connection: AsyncDuckDBConnection;
62+
hivePartitioning: boolean;
5963
}) {
6064
return useQuery({
61-
queryKey: ["stac-geoparquet-datetime-bounds", href],
65+
queryKey: ["stac-geoparquet-datetime-bounds", href, hivePartitioning],
6266
queryFn: async () => {
63-
return await fetchStacGeoparquetDatetimeBounds({ href, connection });
67+
return await fetchStacGeoparquetDatetimeBounds({
68+
href,
69+
connection,
70+
hivePartitioning,
71+
});
6472
},
6573
});
6674
}
@@ -69,18 +77,21 @@ export function useStacGeoparquetTable({
6977
href,
7078
connection,
7179
datetimeFilter,
80+
hivePartitioning,
7281
}: {
7382
href: string;
7483
connection: AsyncDuckDBConnection;
7584
datetimeFilter: DatetimeFilter | null;
85+
hivePartitioning: boolean;
7686
}) {
7787
return useQuery({
78-
queryKey: ["stac-geoparquet-table", href, datetimeFilter],
88+
queryKey: ["stac-geoparquet-table", href, datetimeFilter, hivePartitioning],
7989
queryFn: async () => {
8090
return await fetchStacGeoparquetTable({
8191
href,
8292
connection,
8393
datetimeFilter,
94+
hivePartitioning,
8495
});
8596
},
8697
placeholderData: (previousData) => previousData,
@@ -91,15 +102,22 @@ export function useStacGeoparquetItem({
91102
id,
92103
href,
93104
connection,
105+
hivePartitioning,
94106
}: {
95107
id: string;
96108
href: string;
97109
connection: AsyncDuckDBConnection;
110+
hivePartitioning: boolean;
98111
}) {
99112
return useQuery({
100-
queryKey: ["stac-geoparquet-item", id, href],
113+
queryKey: ["stac-geoparquet-item", id, href, hivePartitioning],
101114
queryFn: async () => {
102-
return await fetchStacGeoparquetItem({ id, href, connection });
115+
return await fetchStacGeoparquetItem({
116+
id,
117+
href,
118+
connection,
119+
hivePartitioning,
120+
});
103121
},
104122
});
105123
}

src/store/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ export const useStore = create<State>()(
7070
name: "stac-map-settings",
7171
partialize: (state) => ({
7272
restrictToThreeBandCogs: state.restrictToThreeBandCogs,
73+
hivePartitioning: state.hivePartitioning,
7374
}),
7475
}
7576
)

src/store/settings.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import type { State } from ".";
44
export interface SettingsState {
55
restrictToThreeBandCogs: boolean;
66
setRestrictToThreeBandCogs: (restrict: boolean) => void;
7+
hivePartitioning: boolean;
8+
setHivePartitioning: (hivePartitioning: boolean) => void;
79
}
810

911
export const createSettingsSlice: StateCreator<State, [], [], SettingsState> = (
@@ -12,4 +14,6 @@ export const createSettingsSlice: StateCreator<State, [], [], SettingsState> = (
1214
restrictToThreeBandCogs: true,
1315
setRestrictToThreeBandCogs: (restrict) =>
1416
set({ restrictToThreeBandCogs: restrict }),
17+
hivePartitioning: true,
18+
setHivePartitioning: (hivePartitioning) => set({ hivePartitioning }),
1519
});

src/utils/stac-geoparquet.ts

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,46 @@ import type { DatetimeFilter } from "../store/datetime";
1313
import type { StacItemCollection } from "../types/stac";
1414

1515
export const SUPPORTED_GEOMETRY_TYPES = ["point", "polygon"] as const;
16+
17+
export async function executeDuckdbQuery({
18+
connection,
19+
select,
20+
href,
21+
where,
22+
hivePartitioning,
23+
}: {
24+
connection: AsyncDuckDBConnection;
25+
select: string;
26+
href: string;
27+
where?: string;
28+
hivePartitioning: boolean;
29+
}) {
30+
let query = `SELECT ${select} FROM read_parquet('${href}', hive_partitioning = ${hivePartitioning})`;
31+
if (where) {
32+
query += ` WHERE ${where}`;
33+
}
34+
console.log(query);
35+
36+
return await connection.query(query);
37+
}
1638
export type SupportedGeometryType = (typeof SUPPORTED_GEOMETRY_TYPES)[number];
1739

1840
export async function fetchStacGeoparquet({
1941
href,
2042
connection,
43+
hivePartitioning,
2144
}: {
2245
href: string;
2346
connection: AsyncDuckDBConnection;
47+
hivePartitioning: boolean;
2448
}): Promise<StacItemCollection> {
25-
const query = `SELECT COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax FROM read_parquet('${href}')`;
26-
const result = await connection.query(query);
49+
const result = await executeDuckdbQuery({
50+
connection,
51+
href,
52+
hivePartitioning,
53+
select:
54+
"COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax",
55+
});
2756
const row = result.toArray().map((row) => row.toJSON())[0];
2857
return {
2958
type: "FeatureCollection",
@@ -43,15 +72,25 @@ export async function fetchStacGeoparquet({
4372
export async function fetchStacGeoparquetDatetimeBounds({
4473
href,
4574
connection,
75+
hivePartitioning,
4676
}: {
4777
href: string;
4878
connection: AsyncDuckDBConnection;
79+
hivePartitioning: boolean;
4980
}): Promise<{ start: Date; end: Date } | null> {
5081
const { startDatetimeColumnName, endDatetimeColumnName } =
51-
await fetchStacGeoparquetDatetimeColumns(href, connection);
82+
await fetchStacGeoparquetDatetimeColumns(
83+
href,
84+
connection,
85+
hivePartitioning
86+
);
5287
if (!startDatetimeColumnName || !endDatetimeColumnName) return null;
53-
const query = `SELECT MIN(${startDatetimeColumnName}) as start, MAX(${endDatetimeColumnName}) as end FROM read_parquet('${href}')`;
54-
const result = await connection.query(query);
88+
const result = await executeDuckdbQuery({
89+
connection,
90+
href,
91+
hivePartitioning,
92+
select: `MIN(${startDatetimeColumnName}) as start, MAX(${endDatetimeColumnName}) as end`,
93+
});
5594
const row = result.toArray().map((row) => row.toJSON())[0];
5695
return {
5796
start: new Date(row.start),
@@ -63,20 +102,33 @@ export async function fetchStacGeoparquetTable({
63102
href,
64103
connection,
65104
datetimeFilter,
105+
hivePartitioning,
66106
}: {
67107
href: string;
68108
connection: AsyncDuckDBConnection;
69109
datetimeFilter: DatetimeFilter | null;
110+
hivePartitioning: boolean;
70111
}) {
71-
let query = `SELECT ST_AsWKB(geometry) AS geometry, ST_GeometryType(geometry) AS geometry_type, id FROM read_parquet('${href}')`;
112+
let where: string | undefined;
72113
if (datetimeFilter) {
73114
const { startDatetimeColumnName, endDatetimeColumnName } =
74-
await fetchStacGeoparquetDatetimeColumns(href, connection);
115+
await fetchStacGeoparquetDatetimeColumns(
116+
href,
117+
connection,
118+
hivePartitioning
119+
);
75120
if (!startDatetimeColumnName || !endDatetimeColumnName) return null;
76121
const { start, end } = datetimeFilter;
77-
query += ` WHERE ${startDatetimeColumnName} >= '${start.toISOString()}' AND ${endDatetimeColumnName} <= '${end.toISOString()}'`;
122+
where = `${startDatetimeColumnName} >= '${start.toISOString()}' AND ${endDatetimeColumnName} <= '${end.toISOString()}'`;
78123
}
79-
const result = await connection.query(query);
124+
const result = await executeDuckdbQuery({
125+
connection,
126+
href,
127+
hivePartitioning,
128+
select:
129+
"ST_AsWKB(geometry) AS geometry, ST_GeometryType(geometry) AS geometry_type, id",
130+
where,
131+
});
80132
const geometry: Uint8Array[] = result.getChildAt(0)?.toArray();
81133
const geometryType = result.getChildAt(1)?.toArray()[0]?.toLowerCase() as
82134
| string
@@ -136,25 +188,32 @@ export async function fetchStacGeoparquetItem({
136188
id,
137189
href,
138190
connection,
191+
hivePartitioning,
139192
}: {
140193
id: string;
141194
href: string;
142195
connection: AsyncDuckDBConnection;
196+
hivePartitioning: boolean;
143197
}) {
144-
const result = await connection.query(
145-
`SELECT * REPLACE ST_AsGeoJSON(geometry) as geometry FROM read_parquet('${href}') WHERE id = '${id}'`
146-
);
198+
const result = await executeDuckdbQuery({
199+
connection,
200+
href,
201+
hivePartitioning,
202+
select: "* REPLACE ST_AsGeoJSON(geometry) as geometry",
203+
where: `id = '${id}'`,
204+
});
147205
const item = stacWasm.arrowToStacJson(result)[0];
148206
item.geometry = JSON.parse(item.geometry);
149207
return item;
150208
}
151209

152210
async function fetchStacGeoparquetDatetimeColumns(
153211
href: string,
154-
connection: AsyncDuckDBConnection
212+
connection: AsyncDuckDBConnection,
213+
hivePartitioning: boolean
155214
) {
156215
const describeResult = await connection.query(
157-
`DESCRIBE SELECT * FROM read_parquet('${href}')`
216+
`DESCRIBE SELECT * FROM read_parquet('${href}', hive_partitioning = ${hivePartitioning})`
158217
);
159218
const describe = describeResult.toArray().map((row) => row.toJSON());
160219
const columnNames = describe.map((row) => row.column_name);

0 commit comments

Comments
 (0)