Skip to content

[9.1] Fix match_only_text keyword multi-field bug (#131383) #131467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -253,11 +252,18 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
String parentField = searchExecutionContext.parentPath(name());
var parent = searchExecutionContext.lookup().fieldType(parentField);
if (parent.isStored()) {
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {

if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
if (parent.isStored()) {
return storedFieldFetcher(parentField, keywordParent.originalName());
} else if (parent.hasDocValues()) {
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(keywordParent.originalName()));
}
}

if (parent.isStored()) {
return storedFieldFetcher(parentField);
} else if (parent.hasDocValues()) {
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
Expand All @@ -268,14 +274,21 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);

if (kwd != null) {
var fieldType = kwd.fieldType();
if (fieldType.isStored()) {
if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {

if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
if (fieldType.isStored()) {
return storedFieldFetcher(fieldType.name(), fieldType.originalName());
} else {
return storedFieldFetcher(fieldType.name());
} else if (fieldType.hasDocValues()) {
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
}
}

if (fieldType.isStored()) {
return storedFieldFetcher(fieldType.name());
} else if (fieldType.hasDocValues()) {
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
return docValuesFieldFetcher(ifd);
Expand Down Expand Up @@ -332,7 +345,42 @@ private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IO
if (names.length == 1) {
return storedFields.get(names[0]);
}
return Arrays.stream(names).map(storedFields::get).filter(Objects::nonNull).flatMap(List::stream).toList();

List<Object> values = new ArrayList<>();
for (var name : names) {
var currValues = storedFields.get(name);
if (currValues != null) {
values.addAll(currValues);
}
}

return values;
};
};
}

private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> combineFieldFetchers(
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> primaryFetcher,
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> secondaryFetcher
) {
return context -> {
var primaryGetter = primaryFetcher.apply(context);
var secondaryGetter = secondaryFetcher.apply(context);
return docId -> {
List<Object> values = new ArrayList<>();
var primary = primaryGetter.apply(docId);
if (primary != null) {
values.addAll(primary);
}

var secondary = secondaryGetter.apply(docId);
if (secondary != null) {
values.addAll(secondary);
}

assert primary != null || secondary != null;

return values;
};
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,50 @@ synthetic_source match_only_text as multi-field:
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

---
synthetic_source match_only_text as multi-field with ignored keyword as parent:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: keyword
store: false
doc_values: true
ignore_above: 10
fields:
text:
type: match_only_text

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo.text: apache lucene

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]

---
synthetic_source match_only_text as multi-field with stored keyword as parent:
- requires:
Expand Down Expand Up @@ -562,6 +606,50 @@ synthetic_source match_only_text with multi-field:
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

---
synthetic_source match_only_text with ignored multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: match_only_text
fields:
raw:
type: keyword
store: false
doc_values: true
ignore_above: 10

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: apache lucene

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

---
synthetic_source match_only_text with stored multi-field:
- requires:
Expand Down