Skip to content

Commit 59b114c

Browse files
Fix match_only_text keyword multi-field bug (#131383) (#131466)
In #131314 we fixed match_only_text fields with ignore_above keyword multi-fields in the case that the keyword multi-field is stored. However, the issue is still present if the keyword field is not stored, but instead has doc values. This patch fixes that case.
1 parent 3a2862d commit 59b114c

File tree

2 files changed

+145
-9
lines changed

2 files changed

+145
-9
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
import java.io.IOException;
6868
import java.io.UncheckedIOException;
6969
import java.util.ArrayList;
70-
import java.util.Arrays;
7170
import java.util.Collections;
7271
import java.util.List;
7372
import java.util.Map;
@@ -247,11 +246,18 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
247246
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
248247
String parentField = searchExecutionContext.parentPath(name());
249248
var parent = searchExecutionContext.lookup().fieldType(parentField);
250-
if (parent.isStored()) {
251-
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
252-
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
249+
250+
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
251+
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
252+
if (parent.isStored()) {
253253
return storedFieldFetcher(parentField, keywordParent.originalName());
254+
} else if (parent.hasDocValues()) {
255+
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
256+
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(keywordParent.originalName()));
254257
}
258+
}
259+
260+
if (parent.isStored()) {
255261
return storedFieldFetcher(parentField);
256262
} else if (parent.hasDocValues()) {
257263
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
@@ -262,14 +268,21 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
262268
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
263269
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
264270
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
271+
265272
if (kwd != null) {
266273
var fieldType = kwd.fieldType();
267-
if (fieldType.isStored()) {
268-
if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
274+
275+
if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
276+
if (fieldType.isStored()) {
269277
return storedFieldFetcher(fieldType.name(), fieldType.originalName());
270-
} else {
271-
return storedFieldFetcher(fieldType.name());
278+
} else if (fieldType.hasDocValues()) {
279+
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
280+
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
272281
}
282+
}
283+
284+
if (fieldType.isStored()) {
285+
return storedFieldFetcher(fieldType.name());
273286
} else if (fieldType.hasDocValues()) {
274287
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
275288
return docValuesFieldFetcher(ifd);
@@ -326,7 +339,42 @@ private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IO
326339
if (names.length == 1) {
327340
return storedFields.get(names[0]);
328341
}
329-
return Arrays.stream(names).map(storedFields::get).filter(Objects::nonNull).flatMap(List::stream).toList();
342+
343+
List<Object> values = new ArrayList<>();
344+
for (var name : names) {
345+
var currValues = storedFields.get(name);
346+
if (currValues != null) {
347+
values.addAll(currValues);
348+
}
349+
}
350+
351+
return values;
352+
};
353+
};
354+
}
355+
356+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> combineFieldFetchers(
357+
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> primaryFetcher,
358+
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> secondaryFetcher
359+
) {
360+
return context -> {
361+
var primaryGetter = primaryFetcher.apply(context);
362+
var secondaryGetter = secondaryFetcher.apply(context);
363+
return docId -> {
364+
List<Object> values = new ArrayList<>();
365+
var primary = primaryGetter.apply(docId);
366+
if (primary != null) {
367+
values.addAll(primary);
368+
}
369+
370+
var secondary = secondaryGetter.apply(docId);
371+
if (secondary != null) {
372+
values.addAll(secondary);
373+
}
374+
375+
assert primary != null || secondary != null;
376+
377+
return values;
330378
};
331379
};
332380
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,50 @@ synthetic_source match_only_text as multi-field:
435435
- match:
436436
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
437437

438+
---
439+
synthetic_source match_only_text as multi-field with ignored keyword as parent:
440+
- requires:
441+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
442+
reason: "Source mode configured through index setting"
443+
444+
- do:
445+
indices.create:
446+
index: synthetic_source_test
447+
body:
448+
settings:
449+
index:
450+
mapping.source.mode: synthetic
451+
mappings:
452+
properties:
453+
foo:
454+
type: keyword
455+
store: false
456+
doc_values: true
457+
ignore_above: 10
458+
fields:
459+
text:
460+
type: match_only_text
461+
462+
- do:
463+
index:
464+
index: synthetic_source_test
465+
id: "1"
466+
refresh: true
467+
body:
468+
foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]
469+
470+
- do:
471+
search:
472+
index: synthetic_source_test
473+
body:
474+
query:
475+
match_phrase:
476+
foo.text: apache lucene
477+
478+
- match: { "hits.total.value": 1 }
479+
- match:
480+
hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]
481+
438482
---
439483
synthetic_source match_only_text as multi-field with stored keyword as parent:
440484
- requires:
@@ -562,6 +606,50 @@ synthetic_source match_only_text with multi-field:
562606
- match:
563607
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
564608

609+
---
610+
synthetic_source match_only_text with ignored multi-field:
611+
- requires:
612+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
613+
reason: "Source mode configured through index setting"
614+
615+
- do:
616+
indices.create:
617+
index: synthetic_source_test
618+
body:
619+
settings:
620+
index:
621+
mapping.source.mode: synthetic
622+
mappings:
623+
properties:
624+
foo:
625+
type: match_only_text
626+
fields:
627+
raw:
628+
type: keyword
629+
store: false
630+
doc_values: true
631+
ignore_above: 10
632+
633+
- do:
634+
index:
635+
index: synthetic_source_test
636+
id: "1"
637+
refresh: true
638+
body:
639+
foo: "Apache Lucene powers Elasticsearch"
640+
641+
- do:
642+
search:
643+
index: synthetic_source_test
644+
body:
645+
query:
646+
match_phrase:
647+
foo: apache lucene
648+
649+
- match: { "hits.total.value": 1 }
650+
- match:
651+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
652+
565653
---
566654
synthetic_source match_only_text with stored multi-field:
567655
- requires:

0 commit comments

Comments
 (0)