Skip to content

[9.0] Backporting fix minscore propagation in text similarity reranker #129700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/129223.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129223
summary: Fix text similarity reranker does not propagate min score correctly
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ public void onFailure(Exception e) {
RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder(
rankWindowSize,
newRetrievers.stream().map(s -> s.retriever).toList(),
results::get
results::get,
this.minScore
);
rankDocsRetrieverBuilder.retrieverName(retrieverName());
return rankDocsRetrieverBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder {
final List<RetrieverBuilder> sources;
final Supplier<RankDoc[]> rankDocs;

public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs) {
public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs, Float minScore) {
this.rankWindowSize = rankWindowSize;
this.rankDocs = rankDocs;
if (sources == null || sources.isEmpty()) {
throw new IllegalArgumentException("sources must not be null or empty");
}
this.sources = sources;
this.minScore = minScore;
}

@Override
Expand All @@ -48,7 +49,7 @@ public String getName() {
}

private boolean sourceHasMinScore() {
return minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
return this.minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
}

private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException {
Expand Down Expand Up @@ -132,7 +133,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
searchSourceBuilder.size(rankWindowSize);
}
if (sourceHasMinScore()) {
searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore());
searchSourceBuilder.minScore(this.minScore == null ? Float.MIN_VALUE : this.minScore);
}
if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) {
searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ private List<QueryBuilder> preFilters(QueryRewriteContext queryRewriteContext) t
}

private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException {
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier());
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier(), null);
}

public void testExtractToSearchSourceBuilder() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public Set<NodeFeature> getTestFeatures() {
SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES,
SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED,
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_MINSCORE_FIX,
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
SEMANTIC_KNN_FILTER_FIX,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

Expand All @@ -39,6 +40,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder
public static final NodeFeature TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX = new NodeFeature(
"text_similarity_reranker_alias_handling_fix"
);
public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix");

public static final ParseField RETRIEVER_FIELD = new ParseField("retriever");
public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
Expand Down Expand Up @@ -141,23 +143,21 @@ protected TextSimilarityRankRetrieverBuilder clone(
protected RankDoc[] combineInnerRetrieverResults(List<ScoreDoc[]> rankResults, boolean explain) {
assert rankResults.size() == 1;
ScoreDoc[] scoreDocs = rankResults.getFirst();
TextSimilarityRankDoc[] textSimilarityRankDocs = new TextSimilarityRankDoc[scoreDocs.length];
List<TextSimilarityRankDoc> filteredDocs = new ArrayList<>();
// Filtering by min_score must be done here, after reranking.
// Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker.
for (int i = 0; i < scoreDocs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
assert scoreDoc.score >= 0;
if (explain) {
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(
scoreDoc.doc,
scoreDoc.score,
scoreDoc.shardIndex,
inferenceId,
field
);
} else {
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex);
if (minScore == null || scoreDoc.score >= minScore) {
if (explain) {
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex, inferenceId, field));
} else {
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex));
}
}
}
return textSimilarityRankDocs;
return filteredDocs.toArray(new TextSimilarityRankDoc[0]);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,111 @@ setup:
- match: { hits.total.value: 1 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._id: "doc_1" }

---
"Text similarity reranker respects min_score":

- requires:
cluster_features: "text_similarity_reranker_minscore_fix"
reason: test min score functionality

- do:
index:
index: test-index
id: doc_2
body:
text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
topic: [ "science" ]
subtopic: [ "astronomy" ]
inference_text_field: "10"
refresh: true

- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
text_similarity_reranker:
retriever:
standard:
query:
bool:
should:
- constant_score:
filter:
term: { subtopic: "technology" }
boost: 10
- constant_score:
filter:
term: { subtopic: "astronomy" }
boost: 1
rank_window_size: 10
inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?"
field: inference_text_field
min_score: 10
size: 10

- match: { hits.total.value: 1 }
- length: { hits.hits: 1 }
- match: { hits.hits.0._id: "doc_2" }

---
"Text similarity reranker with min_score zero includes all docs":

- requires:
cluster_features: "text_similarity_reranker_minscore_fix"
reason: test min score functionality

- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
text_similarity_reranker:
retriever:
standard:
query:
match_all: {}
rank_window_size: 10
inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?"
field: inference_text_field
min_score: 0
size: 10

- match: { hits.total.value: 3 }
- length: { hits.hits: 3 }

---
"Text similarity reranker with high min_score excludes all docs":

- requires:
cluster_features: "text_similarity_reranker_minscore_fix"
reason: test min score functionality

- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
text_similarity_reranker:
retriever:
standard:
query:
match_all: {}
rank_window_size: 10
inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?"
field: inference_text_field
min_score: 1000
size: 10

- match: { hits.total.value: 0 }
- length: { hits.hits: 0 }