|
| 1 | +/* |
| 2 | + * Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com) |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + * |
| 16 | + * SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com) |
| 17 | + * SPDX-License-Identifier: Apache-2.0 |
| 18 | + */ |
| 19 | +package com.arcadedb.index.vector; |
| 20 | + |
| 21 | +import com.arcadedb.GlobalConfiguration; |
| 22 | +import com.arcadedb.database.Database; |
| 23 | +import com.arcadedb.database.DatabaseFactory; |
| 24 | +import com.arcadedb.database.RID; |
| 25 | +import com.arcadedb.query.sql.executor.Result; |
| 26 | +import com.arcadedb.query.sql.executor.ResultSet; |
| 27 | +import com.arcadedb.schema.Type; |
| 28 | +import com.arcadedb.utility.FileUtils; |
| 29 | +import com.arcadedb.utility.Pair; |
| 30 | +import org.junit.jupiter.api.Tag; |
| 31 | +import org.junit.jupiter.api.Test; |
| 32 | + |
| 33 | +import java.io.File; |
| 34 | +import java.util.*; |
| 35 | + |
| 36 | +/** |
| 37 | + * Benchmark reproducing the tae898 benchmark scenario from GitHub discussion #3674. |
| 38 | + * Uses 384-dim vectors (matching all-MiniLM-L6-v2), k=50, ef_search=100. |
| 39 | + * Tests both the direct Java API and the SQL vectorNeighbors() path. |
| 40 | + * <p> |
| 41 | + * Configurations tested: |
| 42 | + * 1. Default (no quantization, no inline vectors) — matches what the benchmark uses |
| 43 | + * 2. storeVectorsInGraph=true — vectors in graph file for mmap'd access |
| 44 | + * 3. PRODUCT quantization with FusedPQ — approximate scoring during traversal |
| 45 | + * <p> |
| 46 | + * Usage: |
| 47 | + * mvn test -pl engine -Dtest=VectorSearchLatencyBenchmark -DfailIfNoTests=false \ |
| 48 | + * -Dvector.bench.numVectors=500000 |
| 49 | + * |
| 50 | + * @author Luca Garulli (l.garulli@arcadedata.com) |
| 51 | + */ |
| 52 | +@Tag("benchmark") |
| 53 | +class VectorSearchLatencyBenchmark { |
| 54 | + private static final String DB_PATH = "target/test-databases/VectorSearchLatencyBenchmark"; |
| 55 | + |
| 56 | + // Match tae898 benchmark: 384-dim (all-MiniLM-L6-v2), k=50, ef_search=100 |
| 57 | + private static final int NUM_VECTORS = Integer.getInteger("vector.bench.numVectors", 500_000); |
| 58 | + private static final int DIMENSIONS = Integer.getInteger("vector.bench.dimensions", 384); |
| 59 | + private static final int NUM_QUERIES = Integer.getInteger("vector.bench.numQueries", 100); |
| 60 | + private static final int K = Integer.getInteger("vector.bench.k", 50); |
| 61 | + private static final int EF_SEARCH = Integer.getInteger("vector.bench.efSearch", 100); |
| 62 | + private static final int BATCH_SIZE = Integer.getInteger("vector.bench.batchSize", 10_000); |
| 63 | + private static final int WARMUP_QUERIES = 20; |
| 64 | + private static final long SEED = 42L; |
| 65 | + |
| 66 | + @Test |
| 67 | + void runLatencyBenchmark() { |
| 68 | + System.out.println("=== VECTOR SEARCH LATENCY BENCHMARK (tae898 scenario) ==="); |
| 69 | + System.out.printf("Vectors: %,d | Dimensions: %d | K: %d | efSearch: %d | Queries: %d%n", |
| 70 | + NUM_VECTORS, DIMENSIONS, K, EF_SEARCH, NUM_QUERIES); |
| 71 | + System.out.println(); |
| 72 | + |
| 73 | + final Random rng = new Random(SEED); |
| 74 | + |
| 75 | + System.out.print("Generating clustered data vectors..."); |
| 76 | + final float[][] dataVectors = generateClusteredVectors(NUM_VECTORS, DIMENSIONS, rng); |
| 77 | + final float[][] queryVectors = generateClusteredVectors(NUM_QUERIES, DIMENSIONS, rng); |
| 78 | + System.out.println(" done."); |
| 79 | + |
| 80 | + // Config 1: Default (matches what the tae898 benchmark uses) |
| 81 | + System.out.println(); |
| 82 | + System.out.println("=== Config 1: DEFAULT (no quantization, storeVectorsInGraph=false) ==="); |
| 83 | + FileUtils.deleteRecursively(new File(DB_PATH)); |
| 84 | + runConfig("DEFAULT", dataVectors, queryVectors, "NONE", false); |
| 85 | + |
| 86 | + // Config 2: INT8 quantization (reads from index pages instead of documents) |
| 87 | + System.out.println(); |
| 88 | + System.out.println("=== Config 2: INT8 quantization ==="); |
| 89 | + FileUtils.deleteRecursively(new File(DB_PATH)); |
| 90 | + runConfig("INT8", dataVectors, queryVectors, "INT8", false); |
| 91 | + } |
| 92 | + |
| 93 | + private void runConfig(final String label, final float[][] dataVectors, final float[][] queryVectors, |
| 94 | + final String quantization, final boolean storeVectorsInGraph) { |
| 95 | + |
| 96 | + GlobalConfiguration.PROFILE.setValue("high-performance"); |
| 97 | + final Runtime rt = Runtime.getRuntime(); |
| 98 | + rt.gc(); |
| 99 | + final long memBefore = rt.totalMemory() - rt.freeMemory(); |
| 100 | + |
| 101 | + // Phase 1: Ingest |
| 102 | + System.out.printf("[%s] Phase 1: Ingesting %,d vectors...%n", label, NUM_VECTORS); |
| 103 | + final long ingestStart = System.nanoTime(); |
| 104 | + |
| 105 | + try (final DatabaseFactory factory = new DatabaseFactory(DB_PATH)) { |
| 106 | + try (final Database db = factory.create()) { |
| 107 | + db.transaction(() -> { |
| 108 | + final var type = db.getSchema().createDocumentType("VectorData"); |
| 109 | + type.createProperty("id", Type.INTEGER); |
| 110 | + type.createProperty("vector", Type.ARRAY_OF_FLOATS); |
| 111 | + |
| 112 | + final StringBuilder metadata = new StringBuilder(); |
| 113 | + metadata.append(String.format("\"dimensions\": %d, \"similarity\": \"COSINE\"", DIMENSIONS)); |
| 114 | + if (!"NONE".equals(quantization)) |
| 115 | + metadata.append(String.format(", \"quantization\": \"%s\"", quantization)); |
| 116 | + if (storeVectorsInGraph) |
| 117 | + metadata.append(", \"storeVectorsInGraph\": true"); |
| 118 | + |
| 119 | + db.command("sql", String.format( |
| 120 | + "CREATE INDEX ON VectorData (vector) LSM_VECTOR METADATA { %s }", metadata)); |
| 121 | + }); |
| 122 | + |
| 123 | + int inserted = 0; |
| 124 | + db.begin(); |
| 125 | + for (int i = 0; i < NUM_VECTORS; i++) { |
| 126 | + db.newDocument("VectorData").set("id", i).set("vector", dataVectors[i]).save(); |
| 127 | + inserted++; |
| 128 | + if (inserted % BATCH_SIZE == 0) { |
| 129 | + db.commit(); |
| 130 | + final double elapsed = (System.nanoTime() - ingestStart) / 1e9; |
| 131 | + System.out.printf("\r[%s] Inserted %,d / %,d (%.0f v/s, %.1fs) ", |
| 132 | + label, inserted, NUM_VECTORS, inserted / elapsed, elapsed); |
| 133 | + db.begin(); |
| 134 | + } |
| 135 | + } |
| 136 | + if (db.isTransactionActive()) |
| 137 | + db.commit(); |
| 138 | + } |
| 139 | + |
| 140 | + final double ingestSec = (System.nanoTime() - ingestStart) / 1e9; |
| 141 | + System.out.printf("%n[%s] Ingestion done: %.1fs (%,.0f vectors/sec)%n", |
| 142 | + label, ingestSec, NUM_VECTORS / ingestSec); |
| 143 | + |
| 144 | + // Phase 2: Close + Reopen + Graph build (simulates database restart) |
| 145 | + System.out.printf("[%s] Phase 2: Reopen + graph build...%n", label); |
| 146 | + final long reopenStart = System.nanoTime(); |
| 147 | + |
| 148 | + try (final Database db = factory.open()) { |
| 149 | + final double reopenSec = (System.nanoTime() - reopenStart) / 1e9; |
| 150 | + System.out.printf("[%s] Reopen: %.2fs%n", label, reopenSec); |
| 151 | + |
| 152 | + final LSMVectorIndex index = (LSMVectorIndex) db.getSchema().getType("VectorData") |
| 153 | + .getPolymorphicIndexByProperties("vector").getIndexesOnBuckets()[0]; |
| 154 | + |
| 155 | + // First query triggers graph build |
| 156 | + final long buildStart = System.nanoTime(); |
| 157 | + index.findNeighborsFromVector(queryVectors[0], K, EF_SEARCH); |
| 158 | + final double buildSec = (System.nanoTime() - buildStart) / 1e9; |
| 159 | + |
| 160 | + rt.gc(); |
| 161 | + final long memAfter = rt.totalMemory() - rt.freeMemory(); |
| 162 | + System.out.printf("[%s] Graph build (first query): %.1fs | Memory delta: %d MB%n", |
| 163 | + label, buildSec, (memAfter - memBefore) / (1024 * 1024)); |
| 164 | + |
| 165 | + // Phase 3: Direct API search |
| 166 | + System.out.printf("[%s] Phase 3: Direct API search (k=%d, efSearch=%d)...%n", label, K, EF_SEARCH); |
| 167 | + benchmarkDirectSearch(label, index, queryVectors); |
| 168 | + |
| 169 | + // Phase 4: SQL vectorNeighbors() search (matches what the Python benchmark does) |
| 170 | + System.out.printf("[%s] Phase 4: SQL vectorNeighbors() search...%n", label); |
| 171 | + benchmarkSQLSearch(label, db, queryVectors); |
| 172 | + |
| 173 | + // Phase 5: Print stats |
| 174 | + final Map<String, Long> stats = index.getStats(); |
| 175 | + System.out.printf("[%s] Vector fetch stats: graph=%d doc=%d quantized=%d%n", |
| 176 | + label, |
| 177 | + stats.getOrDefault("vectorFetchFromGraph", 0L), |
| 178 | + stats.getOrDefault("vectorFetchFromDocuments", 0L), |
| 179 | + stats.getOrDefault("vectorFetchFromQuantized", 0L)); |
| 180 | + System.out.printf("[%s] Search ops: %d, total search time: %d ms%n", |
| 181 | + label, |
| 182 | + stats.getOrDefault("searchOperations", 0L), |
| 183 | + stats.getOrDefault("totalSearchLatencyMs", 0L)); |
| 184 | + } |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + private void benchmarkDirectSearch(final String label, final LSMVectorIndex index, final float[][] queryVectors) { |
| 189 | + // Warmup |
| 190 | + for (int i = 0; i < WARMUP_QUERIES; i++) |
| 191 | + index.findNeighborsFromVector(queryVectors[i % queryVectors.length], K, EF_SEARCH); |
| 192 | + |
| 193 | + // Reset metrics after warmup |
| 194 | + index.getStats(); // just read current values |
| 195 | + |
| 196 | + final long[] latenciesNs = new long[NUM_QUERIES]; |
| 197 | + int totalResults = 0; |
| 198 | + |
| 199 | + for (int q = 0; q < NUM_QUERIES; q++) { |
| 200 | + final long qStart = System.nanoTime(); |
| 201 | + final List<Pair<RID, Float>> results = index.findNeighborsFromVector(queryVectors[q], K, EF_SEARCH); |
| 202 | + latenciesNs[q] = System.nanoTime() - qStart; |
| 203 | + totalResults += results.size(); |
| 204 | + } |
| 205 | + |
| 206 | + printLatencyStats(label + " API", latenciesNs, totalResults); |
| 207 | + } |
| 208 | + |
| 209 | + private void benchmarkSQLSearch(final String label, final Database db, final float[][] queryVectors) { |
| 210 | + // Warmup |
| 211 | + for (int i = 0; i < WARMUP_QUERIES; i++) { |
| 212 | + final String vectorStr = vectorToString(queryVectors[i % queryVectors.length]); |
| 213 | + try (final ResultSet rs = db.query("sql", |
| 214 | + String.format("SELECT vectorNeighbors('VectorData[vector]', %s, %d, %d) as res", vectorStr, K, EF_SEARCH))) { |
| 215 | + while (rs.hasNext()) rs.next(); |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + final long[] latenciesNs = new long[NUM_QUERIES]; |
| 220 | + int totalResults = 0; |
| 221 | + |
| 222 | + for (int q = 0; q < NUM_QUERIES; q++) { |
| 223 | + final String vectorStr = vectorToString(queryVectors[q]); |
| 224 | + final long qStart = System.nanoTime(); |
| 225 | + try (final ResultSet rs = db.query("sql", |
| 226 | + String.format("SELECT vectorNeighbors('VectorData[vector]', %s, %d, %d) as res", vectorStr, K, EF_SEARCH))) { |
| 227 | + while (rs.hasNext()) { |
| 228 | + final Result row = rs.next(); |
| 229 | + final Object res = row.getProperty("res"); |
| 230 | + if (res instanceof List<?> list) |
| 231 | + totalResults += list.size(); |
| 232 | + } |
| 233 | + } |
| 234 | + latenciesNs[q] = System.nanoTime() - qStart; |
| 235 | + } |
| 236 | + |
| 237 | + printLatencyStats(label + " SQL", latenciesNs, totalResults); |
| 238 | + } |
| 239 | + |
| 240 | + private void printLatencyStats(final String label, final long[] latenciesNs, final int totalResults) { |
| 241 | + Arrays.sort(latenciesNs); |
| 242 | + final double meanMs = Arrays.stream(latenciesNs).average().orElse(0) / 1e6; |
| 243 | + final double p50Ms = latenciesNs[latenciesNs.length / 2] / 1e6; |
| 244 | + final double p95Ms = latenciesNs[(int) (latenciesNs.length * 0.95)] / 1e6; |
| 245 | + final double p99Ms = latenciesNs[(int) (latenciesNs.length * 0.99)] / 1e6; |
| 246 | + final double minMs = latenciesNs[0] / 1e6; |
| 247 | + final double maxMs = latenciesNs[latenciesNs.length - 1] / 1e6; |
| 248 | + final double avgResultsPerQuery = (double) totalResults / latenciesNs.length; |
| 249 | + |
| 250 | + System.out.printf("[%s] Latency: mean=%.2fms p50=%.2fms p95=%.2fms p99=%.2fms min=%.2fms max=%.2fms%n", |
| 251 | + label, meanMs, p50Ms, p95Ms, p99Ms, minMs, maxMs); |
| 252 | + System.out.printf("[%s] Avg results/query: %.1f%n", label, avgResultsPerQuery); |
| 253 | + } |
| 254 | + |
| 255 | + private String vectorToString(final float[] vector) { |
| 256 | + final StringBuilder sb = new StringBuilder("["); |
| 257 | + for (int i = 0; i < vector.length; i++) { |
| 258 | + if (i > 0) |
| 259 | + sb.append(','); |
| 260 | + sb.append(vector[i]); |
| 261 | + } |
| 262 | + sb.append(']'); |
| 263 | + return sb.toString(); |
| 264 | + } |
| 265 | + |
| 266 | + private float[][] generateClusteredVectors(final int count, final int dims, final Random rng) { |
| 267 | + // Generate clustered vectors that mimic real-world embeddings |
| 268 | + final int numClusters = Math.max(50, count / 500); |
| 269 | + final float clusterSpread = 0.15f; |
| 270 | + |
| 271 | + final float[][] centroids = new float[numClusters][dims]; |
| 272 | + for (int c = 0; c < numClusters; c++) { |
| 273 | + float norm = 0; |
| 274 | + for (int d = 0; d < dims; d++) { |
| 275 | + centroids[c][d] = (float) rng.nextGaussian(); |
| 276 | + norm += centroids[c][d] * centroids[c][d]; |
| 277 | + } |
| 278 | + norm = (float) Math.sqrt(norm); |
| 279 | + for (int d = 0; d < dims; d++) |
| 280 | + centroids[c][d] /= norm; |
| 281 | + } |
| 282 | + |
| 283 | + final float[][] vectors = new float[count][dims]; |
| 284 | + for (int i = 0; i < count; i++) { |
| 285 | + final float[] centroid = centroids[i % numClusters]; |
| 286 | + float norm = 0; |
| 287 | + for (int d = 0; d < dims; d++) { |
| 288 | + vectors[i][d] = centroid[d] + (float) (rng.nextGaussian() * clusterSpread); |
| 289 | + norm += vectors[i][d] * vectors[i][d]; |
| 290 | + } |
| 291 | + norm = (float) Math.sqrt(norm); |
| 292 | + for (int d = 0; d < dims; d++) |
| 293 | + vectors[i][d] /= norm; |
| 294 | + } |
| 295 | + return vectors; |
| 296 | + } |
| 297 | +} |
0 commit comments