Skip to content

Commit 440c7f1

Browse files
committed
feat: build vector index by default
1 parent e1e2cfd commit 440c7f1

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

engine/src/main/java/com/arcadedb/query/sql/parser/CreateIndexStatement.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import com.arcadedb.database.Database;
2424
import com.arcadedb.exception.CommandExecutionException;
2525
import com.arcadedb.exception.CommandSQLParsingException;
26+
import com.arcadedb.index.TypeIndex;
2627
import com.arcadedb.index.lsm.LSMTreeIndexAbstract;
28+
import com.arcadedb.index.vector.LSMVectorIndex;
2729
import com.arcadedb.query.sql.executor.CommandContext;
2830
import com.arcadedb.query.sql.executor.InternalResultSet;
2931
import com.arcadedb.query.sql.executor.Result;
@@ -171,10 +173,20 @@ else if (typeAsString.equalsIgnoreCase("UNIQUE")) {
171173
final Map<String, Object> metadataMap = metadata.toMap((Result) null, context);
172174
final JSONObject jsonMetadata = new JSONObject(metadataMap);
173175

176+
// Extract buildGraphNow directive (default true) before passing metadata to builder
177+
final boolean buildGraphNow = jsonMetadata.getBoolean("buildGraphNow", true);
178+
jsonMetadata.remove("buildGraphNow");
179+
174180
// Builder is now an LSMVectorIndexBuilder after withType(LSM_VECTOR)
175181
final TypeLSMVectorIndexBuilder vectorBuilder = builder.withLSMVectorType();
176182
vectorBuilder.withMetadata(jsonMetadata);
177-
vectorBuilder.create();
183+
final TypeIndex typeIndex = vectorBuilder.create();
184+
185+
// Build the HNSW graph immediately unless explicitly disabled
186+
if (buildGraphNow)
187+
for (final com.arcadedb.index.Index idx : typeIndex.getIndexesOnBuckets())
188+
if (idx instanceof LSMVectorIndex)
189+
((LSMVectorIndex) idx).buildVectorGraphNow();
178190

179191
} else if (indexType == Schema.INDEX_TYPE.FULL_TEXT && metadata != null) {
180192
// Handle full-text index metadata
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com)
17+
* SPDX-License-Identifier: Apache-2.0
18+
*/
19+
package com.arcadedb.index.vector;
20+
21+
import com.arcadedb.TestHelper;
22+
import com.arcadedb.query.sql.executor.ResultSet;
23+
import org.junit.jupiter.api.Test;
24+
25+
import java.util.List;
26+
import java.util.Map;
27+
28+
import static org.assertj.core.api.Assertions.assertThat;
29+
30+
/**
31+
* Tests that CREATE INDEX ... LSM_VECTOR builds the HNSW graph immediately by default,
32+
* so that vector.neighbors queries work right after index creation without needing
33+
* an explicit buildVectorGraphNow() call.
34+
*
35+
* @author Luca Garulli (l.garulli@arcadedata.com)
36+
*/
37+
class VectorIndexBuildGraphOnCreateTest extends TestHelper {
38+
39+
@Test
40+
void buildGraphImmediatelyByDefault() {
41+
// Insert data BEFORE creating the index
42+
database.transaction(() -> {
43+
database.command("sql", "CREATE VERTEX TYPE Movie");
44+
database.command("sql", "CREATE PROPERTY Movie.title STRING");
45+
database.command("sql", "CREATE PROPERTY Movie.embedding ARRAY_OF_FLOATS");
46+
47+
database.command("sql", "INSERT INTO Movie SET title = 'A', embedding = [1.0, 0.0, 0.0, 0.0]");
48+
database.command("sql", "INSERT INTO Movie SET title = 'B', embedding = [0.9, 0.1, 0.0, 0.0]");
49+
database.command("sql", "INSERT INTO Movie SET title = 'C', embedding = [0.0, 1.0, 0.0, 0.0]");
50+
database.command("sql", "INSERT INTO Movie SET title = 'D', embedding = [0.0, 0.0, 1.0, 0.0]");
51+
database.command("sql", "INSERT INTO Movie SET title = 'E', embedding = [0.0, 0.0, 0.0, 1.0]");
52+
});
53+
54+
// Create vector index — graph should be built immediately (default behavior)
55+
database.command("sql", """
56+
CREATE INDEX ON Movie (embedding) LSM_VECTOR
57+
METADATA {
58+
"dimensions": 4,
59+
"similarity": "COSINE"
60+
}""");
61+
62+
// Query should return results immediately without any manual buildVectorGraphNow()
63+
try (final ResultSet rs = database.query("sql",
64+
"SELECT `vector.neighbors`('Movie[embedding]', ?, 3) as neighbors FROM Movie LIMIT 1",
65+
new float[] { 1.0f, 0.0f, 0.0f, 0.0f })) {
66+
assertThat(rs.hasNext()).isTrue();
67+
final Object neighbors = rs.next().getProperty("neighbors");
68+
assertThat(neighbors).isNotNull();
69+
@SuppressWarnings("unchecked")
70+
final List<Map<String, Object>> neighborList = (List<Map<String, Object>>) neighbors;
71+
assertThat(neighborList).isNotEmpty();
72+
assertThat(neighborList.size()).isLessThanOrEqualTo(3);
73+
}
74+
}
75+
76+
@Test
77+
void buildGraphDisabledExplicitly() {
78+
// Insert data BEFORE creating the index
79+
database.transaction(() -> {
80+
database.command("sql", "CREATE VERTEX TYPE Item");
81+
database.command("sql", "CREATE PROPERTY Item.embedding ARRAY_OF_FLOATS");
82+
83+
database.command("sql", "INSERT INTO Item SET embedding = [1.0, 0.0, 0.0]");
84+
database.command("sql", "INSERT INTO Item SET embedding = [0.0, 1.0, 0.0]");
85+
database.command("sql", "INSERT INTO Item SET embedding = [0.0, 0.0, 1.0]");
86+
});
87+
88+
// Create vector index with buildGraphNow: false — graph should NOT be built
89+
database.command("sql", """
90+
CREATE INDEX ON Item (embedding) LSM_VECTOR
91+
METADATA {
92+
"dimensions": 3,
93+
"similarity": "COSINE",
94+
"buildGraphNow": false
95+
}""");
96+
97+
// Query should still eventually work (lazy build on first search),
98+
// but the graph was not built eagerly at CREATE INDEX time
99+
// We verify the index exists and is queryable
100+
try (final ResultSet rs = database.query("sql",
101+
"SELECT `vector.neighbors`('Item[embedding]', ?, 2) as neighbors FROM Item LIMIT 1",
102+
new float[] { 1.0f, 0.0f, 0.0f })) {
103+
assertThat(rs.hasNext()).isTrue();
104+
}
105+
}
106+
}

0 commit comments

Comments
 (0)