Skip to content

Commit 47d4b98

Browse files
authored
IVF Hierarchical KMeans Flush & Merge (#128675)
added hierarchical kmeans as a clustering algorithm to better partitionin the space when running ivf on flush and merge
1 parent 1e13409 commit 47d4b98

File tree

17 files changed

+1089
-1227
lines changed

17 files changed

+1089
-1227
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,12 +276,11 @@ TopDocs doVectorQuery(byte[] vector, IndexSearcher searcher) throws IOException
276276
TopDocs doVectorQuery(float[] vector, IndexSearcher searcher) throws IOException {
277277
Query knnQuery;
278278
int topK = this.topK;
279-
int efSearch = this.efSearch;
280279
if (overSamplingFactor > 1f) {
281280
// oversample the topK results to get more candidates for the final result
282281
topK = (int) Math.ceil(topK * overSamplingFactor);
283-
efSearch = Math.max(topK, efSearch);
284282
}
283+
int efSearch = Math.max(topK, this.efSearch);
285284
if (indexType == KnnIndexTester.IndexType.IVF) {
286285
knnQuery = new IVFKnnFloatVectorQuery(VECTOR_FIELD, vector, topK, efSearch, null, nProbe);
287286
} else {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors;
11+
12+
import org.apache.lucene.internal.hppc.IntArrayList;
13+
14+
final class CentroidAssignments {
15+
16+
private final int numCentroids;
17+
private final float[][] cachedCentroids;
18+
private final IntArrayList[] assignmentsByCluster;
19+
20+
private CentroidAssignments(int numCentroids, float[][] cachedCentroids, IntArrayList[] assignmentsByCluster) {
21+
this.numCentroids = numCentroids;
22+
this.cachedCentroids = cachedCentroids;
23+
this.assignmentsByCluster = assignmentsByCluster;
24+
}
25+
26+
CentroidAssignments(float[][] centroids, IntArrayList[] assignmentsByCluster) {
27+
this(centroids.length, centroids, assignmentsByCluster);
28+
}
29+
30+
CentroidAssignments(int numCentroids, IntArrayList[] assignmentsByCluster) {
31+
this(numCentroids, null, assignmentsByCluster);
32+
}
33+
34+
// Getters and setters
35+
public int numCentroids() {
36+
return numCentroids;
37+
}
38+
39+
public float[][] cachedCentroids() {
40+
return cachedCentroids;
41+
}
42+
43+
public IntArrayList[] assignmentsByCluster() {
44+
return assignmentsByCluster;
45+
}
46+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,6 @@ public float score(int centroidOrdinal) throws IOException {
112112
};
113113
}
114114

115-
@Override
116-
protected FloatVectorValues getCentroids(IndexInput indexInput, int numCentroids, FieldInfo info) {
117-
FieldEntry entry = fields.get(info.number);
118-
if (entry == null) {
119-
return null;
120-
}
121-
return new OffHeapCentroidFloatVectorValues(numCentroids, indexInput, info.getVectorDimension());
122-
}
123-
124115
@Override
125116
NeighborQueue scorePostingLists(FieldInfo fieldInfo, KnnCollector knnCollector, CentroidQueryScorer centroidQueryScorer, int nProbe)
126117
throws IOException {

0 commit comments

Comments
 (0)