Skip to content

Commit f95b14f

Browse files
[8.19] Fix behavior for _index LIKE for ESQL (#130849) (#131055)
* Fix behavior for _index LIKE for ESQL (#130849) Fixes _index LIKE <pattern> to always have normal text matching semantics. Implement a generic ExpressionQuery and ExpressionQueryBuilder that can be serialized to the data node. Then the ExpressionQueryBuilder can build an Automaton using TranslationAware.asLuceneQuery() and execute it in Lucine. Introduces a breaking change for LIKE on _index fields. The old like behavior is not correct and does not have normal like semantics from ESQL. Customers upgrading from old build to new build might see a regression, where the data changes due to the like filters on clustering produces different results, but the new results are correct. Behavior for ESQL New CCS to New => New behavior everywhere Old CCS to New => Old behavior everywhere (the isForESQL flag is not passed in from old) New CCS to Old => New behavior for new, old behavior for old (the isForESQL cannot be passed, old does not know about it). Old CCS to Old => Old behavior everywhere Closes #129511 (cherry picked from commit 8c4eaf9) # Conflicts: # benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java # server/src/main/java/org/elasticsearch/TransportVersions.java # server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java # server/src/main/java/org/elasticsearch/index/query/WildcardQueryBuilder.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/DataNodeComputeHandler.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvalsTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java * Prepare for backport to 9.1 and 8.19 * Address code review feedback * Transport version backport * Fix merge errors
1 parent cc0ea38 commit f95b14f

File tree

90 files changed

+1108
-326
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+1108
-326
lines changed

docs/changelog/130849.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130849
2+
summary: Fix behavior for `_index` LIKE for ESQL
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129511

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ static TransportVersion def(int id) {
256256
public static final TransportVersion ESQL_DOCUMENTS_FOUND_AND_VALUES_LOADED_8_19 = def(8_841_0_61);
257257
public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN_8_19 = def(8_841_0_62);
258258
public static final TransportVersion ESQL_SPLIT_ON_BIG_VALUES_8_19 = def(8_841_0_63);
259+
public static final TransportVersion ESQL_FIXED_INDEX_LIKE_8_19 = def(8_841_0_64);
259260

260261
/*
261262
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/mapper/ConstantFieldType.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import org.apache.lucene.search.MultiTermQuery;
1616
import org.apache.lucene.search.Query;
1717
import org.apache.lucene.util.BytesRef;
18+
import org.apache.lucene.util.automaton.Automaton;
19+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
1820
import org.elasticsearch.common.lucene.search.Queries;
1921
import org.elasticsearch.common.regex.Regex;
2022
import org.elasticsearch.core.Nullable;
@@ -23,6 +25,7 @@
2325

2426
import java.util.Collection;
2527
import java.util.Map;
28+
import java.util.function.Supplier;
2629

2730
/**
2831
* A {@link MappedFieldType} that has the same value for all documents.
@@ -135,9 +138,47 @@ public final Query wildcardQuery(String value, boolean caseInsensitive, QueryRew
135138
}
136139
}
137140

141+
/**
142+
* Returns a query that matches all documents or no documents
143+
* It usually calls {@link #wildcardQuery(String, boolean, QueryRewriteContext)}
144+
* except for IndexFieldType which overrides this method to use its own matching logic.
145+
*/
146+
public Query wildcardLikeQuery(String value, boolean caseInsensitive, QueryRewriteContext context) {
147+
return wildcardQuery(value, caseInsensitive, context);
148+
}
149+
138150
@Override
139151
public final boolean fieldHasValue(FieldInfos fieldInfos) {
140152
// We consider constant field types to always have value.
141153
return true;
142154
}
155+
156+
/**
157+
* Returns the constant value of this field as a string.
158+
* Based on the field type, we need to get it in a different way.
159+
*/
160+
public abstract String getConstantFieldValue(SearchExecutionContext context);
161+
162+
/**
163+
* Returns a query that matches all documents or no documents
164+
* depending on whether the constant value of this field matches or not
165+
*/
166+
@Override
167+
public Query automatonQuery(
168+
Supplier<Automaton> automatonSupplier,
169+
Supplier<CharacterRunAutomaton> characterRunAutomatonSupplier,
170+
@Nullable MultiTermQuery.RewriteMethod method,
171+
SearchExecutionContext context,
172+
String description
173+
) {
174+
CharacterRunAutomaton compiled = characterRunAutomatonSupplier.get();
175+
boolean matches = compiled.run(getConstantFieldValue(context));
176+
if (matches) {
177+
return new MatchAllDocsQuery();
178+
} else {
179+
return new MatchNoDocsQuery(
180+
"The \"" + context.getFullyQualifiedIndex().getName() + "\" query was rewritten to a \"match_none\" query."
181+
);
182+
}
183+
}
143184
}

server/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010
package org.elasticsearch.index.mapper;
1111

1212
import org.apache.lucene.search.MatchAllDocsQuery;
13+
import org.apache.lucene.search.MatchNoDocsQuery;
14+
import org.apache.lucene.search.MultiTermQuery;
1315
import org.apache.lucene.search.Query;
1416
import org.apache.lucene.util.BytesRef;
1517
import org.elasticsearch.common.Strings;
18+
import org.elasticsearch.common.regex.Regex;
19+
import org.elasticsearch.core.Nullable;
1620
import org.elasticsearch.index.fielddata.FieldData;
1721
import org.elasticsearch.index.fielddata.FieldDataContext;
1822
import org.elasticsearch.index.fielddata.IndexFieldData;
@@ -27,6 +31,7 @@
2731

2832
import java.util.Collections;
2933
import java.util.List;
34+
import java.util.Locale;
3035

3136
public class IndexFieldMapper extends MetadataFieldMapper {
3237

@@ -102,6 +107,38 @@ public StoredFieldsSpec storedFieldsSpec() {
102107
};
103108
}
104109

110+
@Override
111+
public Query wildcardLikeQuery(
112+
String value,
113+
@Nullable MultiTermQuery.RewriteMethod method,
114+
boolean caseInsensitve,
115+
SearchExecutionContext context
116+
) {
117+
String indexName = context.getFullyQualifiedIndex().getName();
118+
return getWildcardLikeQuery(value, caseInsensitve, indexName);
119+
}
120+
121+
@Override
122+
public Query wildcardLikeQuery(String value, boolean caseInsensitive, QueryRewriteContext context) {
123+
String indexName = context.getFullyQualifiedIndex().getName();
124+
return getWildcardLikeQuery(value, caseInsensitive, indexName);
125+
}
126+
127+
private static Query getWildcardLikeQuery(String value, boolean caseInsensitve, String indexName) {
128+
if (caseInsensitve) {
129+
value = value.toLowerCase(Locale.ROOT);
130+
indexName = indexName.toLowerCase(Locale.ROOT);
131+
}
132+
if (Regex.simpleMatch(value, indexName)) {
133+
return new MatchAllDocsQuery();
134+
}
135+
return new MatchNoDocsQuery("The \"" + indexName + "\" query was rewritten to a \"match_none\" query.");
136+
}
137+
138+
@Override
139+
public String getConstantFieldValue(SearchExecutionContext context) {
140+
return context.getFullyQualifiedIndex().getName();
141+
}
105142
}
106143

107144
public IndexFieldMapper() {

server/src/main/java/org/elasticsearch/index/mapper/IndexModeFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ protected boolean matches(String pattern, boolean caseInsensitive, QueryRewriteC
6060
return Regex.simpleMatch(pattern, indexMode, caseInsensitive);
6161
}
6262

63+
@Override
64+
public String getConstantFieldValue(SearchExecutionContext context) {
65+
return context.getIndexSettings().getMode().getName();
66+
}
67+
6368
@Override
6469
public Query existsQuery(SearchExecutionContext context) {
6570
return new MatchAllDocsQuery();

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
import org.apache.lucene.index.IndexReader;
2424
import org.apache.lucene.index.LeafReaderContext;
2525
import org.apache.lucene.index.MultiTerms;
26+
import org.apache.lucene.index.Term;
2627
import org.apache.lucene.index.Terms;
2728
import org.apache.lucene.index.TermsEnum;
2829
import org.apache.lucene.search.MultiTermQuery;
2930
import org.apache.lucene.search.Query;
3031
import org.apache.lucene.util.BytesRef;
3132
import org.apache.lucene.util.automaton.Automata;
3233
import org.apache.lucene.util.automaton.Automaton;
34+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
3335
import org.apache.lucene.util.automaton.CompiledAutomaton;
3436
import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
3537
import org.apache.lucene.util.automaton.MinimizationOperations;
@@ -50,6 +52,7 @@
5052
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
5153
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
5254
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
55+
import org.elasticsearch.index.query.AutomatonQueryWithDescription;
5356
import org.elasticsearch.index.query.SearchExecutionContext;
5457
import org.elasticsearch.index.similarity.SimilarityProvider;
5558
import org.elasticsearch.script.Script;
@@ -81,6 +84,7 @@
8184
import java.util.Map;
8285
import java.util.Objects;
8386
import java.util.Set;
87+
import java.util.function.Supplier;
8488

8589
import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
8690
import static org.elasticsearch.core.Strings.format;
@@ -930,6 +934,17 @@ public boolean hasScriptValues() {
930934
public boolean hasNormalizer() {
931935
return normalizer != Lucene.KEYWORD_ANALYZER;
932936
}
937+
938+
@Override
939+
public Query automatonQuery(
940+
Supplier<Automaton> automatonSupplier,
941+
Supplier<CharacterRunAutomaton> characterRunAutomatonSupplier,
942+
@Nullable MultiTermQuery.RewriteMethod method,
943+
SearchExecutionContext context,
944+
String description
945+
) {
946+
return new AutomatonQueryWithDescription(new Term(name()), automatonSupplier.get(), description);
947+
}
933948
}
934949

935950
private final boolean indexed;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import org.apache.lucene.search.TermInSetQuery;
3030
import org.apache.lucene.search.TermQuery;
3131
import org.apache.lucene.util.BytesRef;
32+
import org.apache.lucene.util.automaton.Automaton;
33+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
3234
import org.elasticsearch.ElasticsearchException;
3335
import org.elasticsearch.ElasticsearchParseException;
3436
import org.elasticsearch.cluster.metadata.IndexMetadata;
@@ -58,6 +60,7 @@
5860
import java.util.Objects;
5961
import java.util.Set;
6062
import java.util.function.Function;
63+
import java.util.function.Supplier;
6164

6265
import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
6366

@@ -333,6 +336,19 @@ public final Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteM
333336
return wildcardQuery(value, method, false, context);
334337
}
335338

339+
/**
340+
* Similar to wildcardQuery, except that we change the behavior for ESQL
341+
* to behave like a string LIKE query, where the value is matched as a string
342+
*/
343+
public Query wildcardLikeQuery(
344+
String value,
345+
@Nullable MultiTermQuery.RewriteMethod method,
346+
boolean caseInsensitve,
347+
SearchExecutionContext context
348+
) {
349+
return wildcardQuery(value, method, caseInsensitve, context);
350+
}
351+
336352
public Query wildcardQuery(
337353
String value,
338354
@Nullable MultiTermQuery.RewriteMethod method,
@@ -374,6 +390,23 @@ public Query regexpQuery(
374390
);
375391
}
376392

393+
/**
394+
* Returns a Lucine pushable Query for the current field
395+
* For now can only be AutomatonQuery or MatchAllDocsQuery() or MatchNoDocsQuery()
396+
*/
397+
public Query automatonQuery(
398+
Supplier<Automaton> automatonSupplier,
399+
Supplier<CharacterRunAutomaton> characterRunAutomatonSupplier,
400+
@Nullable MultiTermQuery.RewriteMethod method,
401+
SearchExecutionContext context,
402+
String description
403+
) {
404+
throw new QueryShardException(
405+
context,
406+
"Can only use automaton queries on keyword fields - not on [" + name + "] which is of type [" + typeName() + "]"
407+
);
408+
}
409+
377410
public Query existsQuery(SearchExecutionContext context) {
378411
if (hasDocValues() || getTextSearchInfo().hasNorms()) {
379412
return new FieldExistsQuery(name());

server/src/main/java/org/elasticsearch/index/query/AutomatonQueryBuilder.java

Lines changed: 0 additions & 108 deletions
This file was deleted.

0 commit comments

Comments
 (0)