Skip to content

ESQL: Workaround for RLike handling of empty lang pattern #128895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/128895.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 128895
summary: Workaround for RLike handling of empty lang pattern
area: ES|QL
type: bug
issues:
- 128813
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ public boolean matchesAll() {

@Override
public String exactMatch() {
IntsRef singleton = Operations.getSingleton(automaton());
Automaton a = automaton();
if (a.getNumStates() == 0) { // workaround for https://github.com/elastic/elasticsearch/pull/128887
return null; // Empty automaton has no matches
}
IntsRef singleton = Operations.getSingleton(a);
return singleton != null ? UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length) : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ private boolean rlikeMatchesAll(String pattern) {
return rlike(pattern).matchesAll();
}

private String exactMatchRLike(String pattern) {
return rlike(pattern).exactMatch();
}

private boolean rlikeExactMatch(String pattern) {
return pattern.equals(rlike(pattern).exactMatch());
return pattern.equals(exactMatchRLike(pattern));
}

public void testWildcardMatchAll() {
Expand Down Expand Up @@ -86,4 +90,20 @@ public void testRegexExactMatch() {
assertTrue(rlikeExactMatch("abc"));
assertTrue(rlikeExactMatch("12345"));
}

public void testRegexExactMatchWithEmptyMatch() {
// As soon as there's one no conditional `#` in the pattern, it'll match nothing
assertNull(exactMatchRLike("#"));
assertNull(exactMatchRLike("##"));
assertNull(exactMatchRLike("#foo"));
assertNull(exactMatchRLike("#foo#"));
assertNull(exactMatchRLike("f#oo"));
assertNull(exactMatchRLike("foo#"));
assertNull(exactMatchRLike("#[A-Z]*"));
assertNull(exactMatchRLike("foo(#)"));

assertNotNull(exactMatchRLike("foo#?"));
assertNotNull(exactMatchRLike("#|foo"));
assertNotNull(exactMatchRLike("foo|#"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ public static String stripThrough(String input) {
/** Returns the input string, but with parts of it having the letter casing changed. */
public static String randomCasing(String input) {
StringBuilder sb = new StringBuilder(input.length());
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
chunkEnd = Math.min(i + step, inputLen);
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
var chunkEnd = Math.min(i + step, inputLen);
var chunk = input.substring(i, chunkEnd);
sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,46 @@ public void testReplaceStringCasingWithInsensitiveWildcardMatch() throws IOExcep
assertThat(answer.get("values"), equalTo(List.of(List.of("_\"_$_(_)_+_._[_]_^_{_|_}___", "_#_&_<_>___"))));
}

public void testRLikeHandlingOfEmptyLanguagePattern() throws IOException {
createIndex(testIndexName(), Settings.EMPTY, """
{
"properties": {
"field": {
"type": "keyword"
}
}
}
""");
for (var val : List.of("#", "foo#bar")) {
Request doc = new Request("POST", testIndexName() + "/_doc?refresh=true");
doc.setJsonEntity("""
{
"field": "%s"
}
""".formatted(val));
client().performRequest(doc);
}
// pushed down, matches nothing
var query = "FROM " + testIndexName() + " | WHERE TO_LOWER(field) RLIKE \"#\"";
var answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));

// matches nothing
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));

// matches one doc
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"\\\\#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"))));

// matches both docs
query = "FROM " + testIndexName() + " | WHERE field RLIKE \".*\\\\#.*\" | SORT field";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"), List.of("foo#bar"))));
}

protected static Request prepareRequestWithOptions(RequestObjectBuilder requestObject, Mode mode) throws IOException {
requestObject.build();
Request request = prepareRequest(mode);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,13 @@ emp_no:integer |first_name:keyword
10001 |Georgi
10055 |Georgy
;

# test for https://github.com/elastic/elasticsearch/issues/128813
rlikeWithEmptyLanguagePattern
required_capability: rlike_with_empty_language_pattern
ROW x = "abc" | EVAL bool = x RLIKE "#"
;

x:keyword | bool:boolean
abc | false
;
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,11 @@ public enum Cap {
*/
ENABLE_LOOKUP_JOIN_ON_ALIASES,

/**
* Allows RLIKE to correctly handle the "empty language" flag, `#`.
*/
RLIKE_WITH_EMPTY_LANGUAGE_PATTERN,

/**
* MATCH PHRASE function
*/
Expand Down
Loading