Skip to content

Commit 0104e7e

Browse files
committed
chore: added new vector index benchmark
1 parent dc4d0b8 commit 0104e7e

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
/*
2+
* Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com)
17+
* SPDX-License-Identifier: Apache-2.0
18+
*/
19+
package com.arcadedb.index.vector;
20+
21+
import com.arcadedb.GlobalConfiguration;
22+
import com.arcadedb.database.Database;
23+
import com.arcadedb.database.DatabaseFactory;
24+
import com.arcadedb.database.RID;
25+
import com.arcadedb.query.sql.executor.Result;
26+
import com.arcadedb.query.sql.executor.ResultSet;
27+
import com.arcadedb.schema.Type;
28+
import com.arcadedb.utility.FileUtils;
29+
import com.arcadedb.utility.Pair;
30+
import org.junit.jupiter.api.Tag;
31+
import org.junit.jupiter.api.Test;
32+
33+
import java.io.File;
34+
import java.util.*;
35+
36+
/**
37+
* Benchmark reproducing the tae898 benchmark scenario from GitHub discussion #3674.
38+
* Uses 384-dim vectors (matching all-MiniLM-L6-v2), k=50, ef_search=100.
39+
* Tests both the direct Java API and the SQL vectorNeighbors() path.
40+
* <p>
41+
* Configurations tested:
42+
* 1. Default (no quantization, no inline vectors) — matches what the benchmark uses
43+
* 2. storeVectorsInGraph=true — vectors in graph file for mmap'd access
44+
* 3. PRODUCT quantization with FusedPQ — approximate scoring during traversal
45+
* <p>
46+
* Usage:
47+
* mvn test -pl engine -Dtest=VectorSearchLatencyBenchmark -DfailIfNoTests=false \
48+
* -Dvector.bench.numVectors=500000
49+
*
50+
* @author Luca Garulli (l.garulli@arcadedata.com)
51+
*/
52+
@Tag("benchmark")
53+
class VectorSearchLatencyBenchmark {
54+
private static final String DB_PATH = "target/test-databases/VectorSearchLatencyBenchmark";
55+
56+
// Match tae898 benchmark: 384-dim (all-MiniLM-L6-v2), k=50, ef_search=100
57+
private static final int NUM_VECTORS = Integer.getInteger("vector.bench.numVectors", 500_000);
58+
private static final int DIMENSIONS = Integer.getInteger("vector.bench.dimensions", 384);
59+
private static final int NUM_QUERIES = Integer.getInteger("vector.bench.numQueries", 100);
60+
private static final int K = Integer.getInteger("vector.bench.k", 50);
61+
private static final int EF_SEARCH = Integer.getInteger("vector.bench.efSearch", 100);
62+
private static final int BATCH_SIZE = Integer.getInteger("vector.bench.batchSize", 10_000);
63+
private static final int WARMUP_QUERIES = 20;
64+
private static final long SEED = 42L;
65+
66+
@Test
67+
void runLatencyBenchmark() {
68+
System.out.println("=== VECTOR SEARCH LATENCY BENCHMARK (tae898 scenario) ===");
69+
System.out.printf("Vectors: %,d | Dimensions: %d | K: %d | efSearch: %d | Queries: %d%n",
70+
NUM_VECTORS, DIMENSIONS, K, EF_SEARCH, NUM_QUERIES);
71+
System.out.println();
72+
73+
final Random rng = new Random(SEED);
74+
75+
System.out.print("Generating clustered data vectors...");
76+
final float[][] dataVectors = generateClusteredVectors(NUM_VECTORS, DIMENSIONS, rng);
77+
final float[][] queryVectors = generateClusteredVectors(NUM_QUERIES, DIMENSIONS, rng);
78+
System.out.println(" done.");
79+
80+
// Config 1: Default (matches what the tae898 benchmark uses)
81+
System.out.println();
82+
System.out.println("=== Config 1: DEFAULT (no quantization, storeVectorsInGraph=false) ===");
83+
FileUtils.deleteRecursively(new File(DB_PATH));
84+
runConfig("DEFAULT", dataVectors, queryVectors, "NONE", false);
85+
86+
// Config 2: INT8 quantization (reads from index pages instead of documents)
87+
System.out.println();
88+
System.out.println("=== Config 2: INT8 quantization ===");
89+
FileUtils.deleteRecursively(new File(DB_PATH));
90+
runConfig("INT8", dataVectors, queryVectors, "INT8", false);
91+
}
92+
93+
private void runConfig(final String label, final float[][] dataVectors, final float[][] queryVectors,
94+
final String quantization, final boolean storeVectorsInGraph) {
95+
96+
GlobalConfiguration.PROFILE.setValue("high-performance");
97+
final Runtime rt = Runtime.getRuntime();
98+
rt.gc();
99+
final long memBefore = rt.totalMemory() - rt.freeMemory();
100+
101+
// Phase 1: Ingest
102+
System.out.printf("[%s] Phase 1: Ingesting %,d vectors...%n", label, NUM_VECTORS);
103+
final long ingestStart = System.nanoTime();
104+
105+
try (final DatabaseFactory factory = new DatabaseFactory(DB_PATH)) {
106+
try (final Database db = factory.create()) {
107+
db.transaction(() -> {
108+
final var type = db.getSchema().createDocumentType("VectorData");
109+
type.createProperty("id", Type.INTEGER);
110+
type.createProperty("vector", Type.ARRAY_OF_FLOATS);
111+
112+
final StringBuilder metadata = new StringBuilder();
113+
metadata.append(String.format("\"dimensions\": %d, \"similarity\": \"COSINE\"", DIMENSIONS));
114+
if (!"NONE".equals(quantization))
115+
metadata.append(String.format(", \"quantization\": \"%s\"", quantization));
116+
if (storeVectorsInGraph)
117+
metadata.append(", \"storeVectorsInGraph\": true");
118+
119+
db.command("sql", String.format(
120+
"CREATE INDEX ON VectorData (vector) LSM_VECTOR METADATA { %s }", metadata));
121+
});
122+
123+
int inserted = 0;
124+
db.begin();
125+
for (int i = 0; i < NUM_VECTORS; i++) {
126+
db.newDocument("VectorData").set("id", i).set("vector", dataVectors[i]).save();
127+
inserted++;
128+
if (inserted % BATCH_SIZE == 0) {
129+
db.commit();
130+
final double elapsed = (System.nanoTime() - ingestStart) / 1e9;
131+
System.out.printf("\r[%s] Inserted %,d / %,d (%.0f v/s, %.1fs) ",
132+
label, inserted, NUM_VECTORS, inserted / elapsed, elapsed);
133+
db.begin();
134+
}
135+
}
136+
if (db.isTransactionActive())
137+
db.commit();
138+
}
139+
140+
final double ingestSec = (System.nanoTime() - ingestStart) / 1e9;
141+
System.out.printf("%n[%s] Ingestion done: %.1fs (%,.0f vectors/sec)%n",
142+
label, ingestSec, NUM_VECTORS / ingestSec);
143+
144+
// Phase 2: Close + Reopen + Graph build (simulates database restart)
145+
System.out.printf("[%s] Phase 2: Reopen + graph build...%n", label);
146+
final long reopenStart = System.nanoTime();
147+
148+
try (final Database db = factory.open()) {
149+
final double reopenSec = (System.nanoTime() - reopenStart) / 1e9;
150+
System.out.printf("[%s] Reopen: %.2fs%n", label, reopenSec);
151+
152+
final LSMVectorIndex index = (LSMVectorIndex) db.getSchema().getType("VectorData")
153+
.getPolymorphicIndexByProperties("vector").getIndexesOnBuckets()[0];
154+
155+
// First query triggers graph build
156+
final long buildStart = System.nanoTime();
157+
index.findNeighborsFromVector(queryVectors[0], K, EF_SEARCH);
158+
final double buildSec = (System.nanoTime() - buildStart) / 1e9;
159+
160+
rt.gc();
161+
final long memAfter = rt.totalMemory() - rt.freeMemory();
162+
System.out.printf("[%s] Graph build (first query): %.1fs | Memory delta: %d MB%n",
163+
label, buildSec, (memAfter - memBefore) / (1024 * 1024));
164+
165+
// Phase 3: Direct API search
166+
System.out.printf("[%s] Phase 3: Direct API search (k=%d, efSearch=%d)...%n", label, K, EF_SEARCH);
167+
benchmarkDirectSearch(label, index, queryVectors);
168+
169+
// Phase 4: SQL vectorNeighbors() search (matches what the Python benchmark does)
170+
System.out.printf("[%s] Phase 4: SQL vectorNeighbors() search...%n", label);
171+
benchmarkSQLSearch(label, db, queryVectors);
172+
173+
// Phase 5: Print stats
174+
final Map<String, Long> stats = index.getStats();
175+
System.out.printf("[%s] Vector fetch stats: graph=%d doc=%d quantized=%d%n",
176+
label,
177+
stats.getOrDefault("vectorFetchFromGraph", 0L),
178+
stats.getOrDefault("vectorFetchFromDocuments", 0L),
179+
stats.getOrDefault("vectorFetchFromQuantized", 0L));
180+
System.out.printf("[%s] Search ops: %d, total search time: %d ms%n",
181+
label,
182+
stats.getOrDefault("searchOperations", 0L),
183+
stats.getOrDefault("totalSearchLatencyMs", 0L));
184+
}
185+
}
186+
}
187+
188+
private void benchmarkDirectSearch(final String label, final LSMVectorIndex index, final float[][] queryVectors) {
189+
// Warmup
190+
for (int i = 0; i < WARMUP_QUERIES; i++)
191+
index.findNeighborsFromVector(queryVectors[i % queryVectors.length], K, EF_SEARCH);
192+
193+
// Reset metrics after warmup
194+
index.getStats(); // just read current values
195+
196+
final long[] latenciesNs = new long[NUM_QUERIES];
197+
int totalResults = 0;
198+
199+
for (int q = 0; q < NUM_QUERIES; q++) {
200+
final long qStart = System.nanoTime();
201+
final List<Pair<RID, Float>> results = index.findNeighborsFromVector(queryVectors[q], K, EF_SEARCH);
202+
latenciesNs[q] = System.nanoTime() - qStart;
203+
totalResults += results.size();
204+
}
205+
206+
printLatencyStats(label + " API", latenciesNs, totalResults);
207+
}
208+
209+
private void benchmarkSQLSearch(final String label, final Database db, final float[][] queryVectors) {
210+
// Warmup
211+
for (int i = 0; i < WARMUP_QUERIES; i++) {
212+
final String vectorStr = vectorToString(queryVectors[i % queryVectors.length]);
213+
try (final ResultSet rs = db.query("sql",
214+
String.format("SELECT vectorNeighbors('VectorData[vector]', %s, %d, %d) as res", vectorStr, K, EF_SEARCH))) {
215+
while (rs.hasNext()) rs.next();
216+
}
217+
}
218+
219+
final long[] latenciesNs = new long[NUM_QUERIES];
220+
int totalResults = 0;
221+
222+
for (int q = 0; q < NUM_QUERIES; q++) {
223+
final String vectorStr = vectorToString(queryVectors[q]);
224+
final long qStart = System.nanoTime();
225+
try (final ResultSet rs = db.query("sql",
226+
String.format("SELECT vectorNeighbors('VectorData[vector]', %s, %d, %d) as res", vectorStr, K, EF_SEARCH))) {
227+
while (rs.hasNext()) {
228+
final Result row = rs.next();
229+
final Object res = row.getProperty("res");
230+
if (res instanceof List<?> list)
231+
totalResults += list.size();
232+
}
233+
}
234+
latenciesNs[q] = System.nanoTime() - qStart;
235+
}
236+
237+
printLatencyStats(label + " SQL", latenciesNs, totalResults);
238+
}
239+
240+
private void printLatencyStats(final String label, final long[] latenciesNs, final int totalResults) {
241+
Arrays.sort(latenciesNs);
242+
final double meanMs = Arrays.stream(latenciesNs).average().orElse(0) / 1e6;
243+
final double p50Ms = latenciesNs[latenciesNs.length / 2] / 1e6;
244+
final double p95Ms = latenciesNs[(int) (latenciesNs.length * 0.95)] / 1e6;
245+
final double p99Ms = latenciesNs[(int) (latenciesNs.length * 0.99)] / 1e6;
246+
final double minMs = latenciesNs[0] / 1e6;
247+
final double maxMs = latenciesNs[latenciesNs.length - 1] / 1e6;
248+
final double avgResultsPerQuery = (double) totalResults / latenciesNs.length;
249+
250+
System.out.printf("[%s] Latency: mean=%.2fms p50=%.2fms p95=%.2fms p99=%.2fms min=%.2fms max=%.2fms%n",
251+
label, meanMs, p50Ms, p95Ms, p99Ms, minMs, maxMs);
252+
System.out.printf("[%s] Avg results/query: %.1f%n", label, avgResultsPerQuery);
253+
}
254+
255+
private String vectorToString(final float[] vector) {
256+
final StringBuilder sb = new StringBuilder("[");
257+
for (int i = 0; i < vector.length; i++) {
258+
if (i > 0)
259+
sb.append(',');
260+
sb.append(vector[i]);
261+
}
262+
sb.append(']');
263+
return sb.toString();
264+
}
265+
266+
private float[][] generateClusteredVectors(final int count, final int dims, final Random rng) {
267+
// Generate clustered vectors that mimic real-world embeddings
268+
final int numClusters = Math.max(50, count / 500);
269+
final float clusterSpread = 0.15f;
270+
271+
final float[][] centroids = new float[numClusters][dims];
272+
for (int c = 0; c < numClusters; c++) {
273+
float norm = 0;
274+
for (int d = 0; d < dims; d++) {
275+
centroids[c][d] = (float) rng.nextGaussian();
276+
norm += centroids[c][d] * centroids[c][d];
277+
}
278+
norm = (float) Math.sqrt(norm);
279+
for (int d = 0; d < dims; d++)
280+
centroids[c][d] /= norm;
281+
}
282+
283+
final float[][] vectors = new float[count][dims];
284+
for (int i = 0; i < count; i++) {
285+
final float[] centroid = centroids[i % numClusters];
286+
float norm = 0;
287+
for (int d = 0; d < dims; d++) {
288+
vectors[i][d] = centroid[d] + (float) (rng.nextGaussian() * clusterSpread);
289+
norm += vectors[i][d] * vectors[i][d];
290+
}
291+
norm = (float) Math.sqrt(norm);
292+
for (int d = 0; d < dims; d++)
293+
vectors[i][d] /= norm;
294+
}
295+
return vectors;
296+
}
297+
}

0 commit comments

Comments
 (0)