Skip to content

Commit 7dcbb0f

Browse files
bpinteavaleriy42
authored andcommitted
ESQL: Workaround for RLike handling of empty lang pattern (elastic#128895)
Lucene's `org.apache.lucene.util.automaton.Operations#getSingleton` fails with an Automaton for a `REGEXP_EMPTY` `RegExp`. This adds a workaround for that, to check the type of automaton before calling into that failing method. Closes elastic#128813
1 parent 1dd1f61 commit 7dcbb0f

File tree

7 files changed

+89
-4
lines changed

7 files changed

+89
-4
lines changed

docs/changelog/128895.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 128895
2+
summary: Workaround for RLike handling of empty lang pattern
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 128813

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ public boolean matchesAll() {
3232

3333
@Override
3434
public String exactMatch() {
35-
IntsRef singleton = Operations.getSingleton(automaton());
35+
Automaton a = automaton();
36+
if (a.getNumStates() == 0) { // workaround for https://github.com/elastic/elasticsearch/pull/128887
37+
return null; // Empty automaton has no matches
38+
}
39+
IntsRef singleton = Operations.getSingleton(a);
3640
return singleton != null ? UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length) : null;
3741
}
3842
}

x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/StringPatternTests.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ private boolean rlikeMatchesAll(String pattern) {
3333
return rlike(pattern).matchesAll();
3434
}
3535

36+
private String exactMatchRLike(String pattern) {
37+
return rlike(pattern).exactMatch();
38+
}
39+
3640
private boolean rlikeExactMatch(String pattern) {
37-
return pattern.equals(rlike(pattern).exactMatch());
41+
return pattern.equals(exactMatchRLike(pattern));
3842
}
3943

4044
public void testWildcardMatchAll() {
@@ -86,4 +90,20 @@ public void testRegexExactMatch() {
8690
assertTrue(rlikeExactMatch("abc"));
8791
assertTrue(rlikeExactMatch("12345"));
8892
}
93+
94+
public void testRegexExactMatchWithEmptyMatch() {
95+
// As soon as there's one no conditional `#` in the pattern, it'll match nothing
96+
assertNull(exactMatchRLike("#"));
97+
assertNull(exactMatchRLike("##"));
98+
assertNull(exactMatchRLike("#foo"));
99+
assertNull(exactMatchRLike("#foo#"));
100+
assertNull(exactMatchRLike("f#oo"));
101+
assertNull(exactMatchRLike("foo#"));
102+
assertNull(exactMatchRLike("#[A-Z]*"));
103+
assertNull(exactMatchRLike("foo(#)"));
104+
105+
assertNotNull(exactMatchRLike("foo#?"));
106+
assertNotNull(exactMatchRLike("#|foo"));
107+
assertNotNull(exactMatchRLike("foo|#"));
108+
}
89109
}

x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ public static String stripThrough(String input) {
6666
/** Returns the input string, but with parts of it having the letter casing changed. */
6767
public static String randomCasing(String input) {
6868
StringBuilder sb = new StringBuilder(input.length());
69-
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
70-
chunkEnd = Math.min(i + step, inputLen);
69+
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
70+
var chunkEnd = Math.min(i + step, inputLen);
7171
var chunk = input.substring(i, chunkEnd);
7272
sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
7373
}

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,6 +1440,46 @@ public void testReplaceStringCasingWithInsensitiveWildcardMatch() throws IOExcep
14401440
assertThat(answer.get("values"), equalTo(List.of(List.of("_\"_$_(_)_+_._[_]_^_{_|_}___", "_#_&_<_>___"))));
14411441
}
14421442

1443+
public void testRLikeHandlingOfEmptyLanguagePattern() throws IOException {
1444+
createIndex(testIndexName(), Settings.EMPTY, """
1445+
{
1446+
"properties": {
1447+
"field": {
1448+
"type": "keyword"
1449+
}
1450+
}
1451+
}
1452+
""");
1453+
for (var val : List.of("#", "foo#bar")) {
1454+
Request doc = new Request("POST", testIndexName() + "/_doc?refresh=true");
1455+
doc.setJsonEntity("""
1456+
{
1457+
"field": "%s"
1458+
}
1459+
""".formatted(val));
1460+
client().performRequest(doc);
1461+
}
1462+
// pushed down, matches nothing
1463+
var query = "FROM " + testIndexName() + " | WHERE TO_LOWER(field) RLIKE \"#\"";
1464+
var answer = runEsql(requestObjectBuilder().query(query));
1465+
assertThat(answer.get("values"), equalTo(List.of()));
1466+
1467+
// matches nothing
1468+
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"#\"";
1469+
answer = runEsql(requestObjectBuilder().query(query));
1470+
assertThat(answer.get("values"), equalTo(List.of()));
1471+
1472+
// matches one doc
1473+
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"\\\\#\"";
1474+
answer = runEsql(requestObjectBuilder().query(query));
1475+
assertThat(answer.get("values"), equalTo(List.of(List.of("#"))));
1476+
1477+
// matches both docs
1478+
query = "FROM " + testIndexName() + " | WHERE field RLIKE \".*\\\\#.*\" | SORT field";
1479+
answer = runEsql(requestObjectBuilder().query(query));
1480+
assertThat(answer.get("values"), equalTo(List.of(List.of("#"), List.of("foo#bar"))));
1481+
}
1482+
14431483
protected static Request prepareRequestWithOptions(RequestObjectBuilder requestObject, Mode mode) throws IOException {
14441484
requestObject.build();
14451485
Request request = prepareRequest(mode);

x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,13 @@ emp_no:integer |first_name:keyword
423423
10001 |Georgi
424424
10055 |Georgy
425425
;
426+
427+
# test for https://github.com/elastic/elasticsearch/issues/128813
428+
rlikeWithEmptyLanguagePattern
429+
required_capability: rlike_with_empty_language_pattern
430+
ROW x = "abc" | EVAL bool = x RLIKE "#"
431+
;
432+
433+
x:keyword | bool:boolean
434+
abc | false
435+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,11 @@ public enum Cap {
11771177
*/
11781178
ENABLE_LOOKUP_JOIN_ON_ALIASES,
11791179

1180+
/**
1181+
* Allows RLIKE to correctly handle the "empty language" flag, `#`.
1182+
*/
1183+
RLIKE_WITH_EMPTY_LANGUAGE_PATTERN,
1184+
11801185
/**
11811186
* MATCH PHRASE function
11821187
*/

0 commit comments

Comments
 (0)