Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 088d748

Browse files
authored
Revert "Move glob_to_regex and re_word_boundary to matrix-python-common (#11505) (#11527)
This reverts commit a77c369.
1 parent 14d593f commit 088d748

File tree

8 files changed

+124
-12
lines changed

8 files changed

+124
-12
lines changed

changelog.d/11527.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Temporarily revert usage of `matrix-python-common`.

synapse/config/room_directory.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@
1515

1616
from typing import List
1717

18-
from matrix_common.regex import glob_to_regex
19-
2018
from synapse.types import JsonDict
19+
from synapse.util import glob_to_regex
2120

2221
from ._base import Config, ConfigError
2322

synapse/config/tls.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
import os
1717
from typing import List, Optional, Pattern
1818

19-
from matrix_common.regex import glob_to_regex
20-
2119
from OpenSSL import SSL, crypto
2220
from twisted.internet._sslverify import Certificate, trustRootFromCertificates
2321

2422
from synapse.config._base import Config, ConfigError
23+
from synapse.util import glob_to_regex
2524

2625
logger = logging.getLogger(__name__)
2726

synapse/federation/federation_server.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
Union,
2929
)
3030

31-
from matrix_common.regex import glob_to_regex
3231
from prometheus_client import Counter, Gauge, Histogram
3332

3433
from twisted.internet import defer
@@ -67,7 +66,7 @@
6766
)
6867
from synapse.storage.databases.main.lock import Lock
6968
from synapse.types import JsonDict, get_domain_from_id
70-
from synapse.util import json_decoder, unwrapFirstError
69+
from synapse.util import glob_to_regex, json_decoder, unwrapFirstError
7170
from synapse.util.async_helpers import Linearizer, concurrently_execute
7271
from synapse.util.caches.response_cache import ResponseCache
7372
from synapse.util.stringutils import parse_server_name

synapse/push/push_rule_evaluator.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717
import re
1818
from typing import Any, Dict, List, Optional, Pattern, Tuple, Union
1919

20-
from matrix_common.regex import glob_to_regex, to_word_pattern
21-
2220
from synapse.events import EventBase
2321
from synapse.types import JsonDict, UserID
22+
from synapse.util import glob_to_regex, re_word_boundary
2423
from synapse.util.caches.lrucache import LruCache
2524

2625
logger = logging.getLogger(__name__)
@@ -185,7 +184,7 @@ def _contains_display_name(self, display_name: Optional[str]) -> bool:
185184
r = regex_cache.get((display_name, False, True), None)
186185
if not r:
187186
r1 = re.escape(display_name)
188-
r1 = to_word_pattern(r1)
187+
r1 = re_word_boundary(r1)
189188
r = re.compile(r1, flags=re.IGNORECASE)
190189
regex_cache[(display_name, False, True)] = r
191190

@@ -214,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
214213
try:
215214
r = regex_cache.get((glob, True, word_boundary), None)
216215
if not r:
217-
r = glob_to_regex(glob, word_boundary=word_boundary)
216+
r = glob_to_regex(glob, word_boundary)
218217
regex_cache[(glob, True, word_boundary)] = r
219218
return bool(r.search(value))
220219
except re.error:

synapse/python_dependencies.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@
8787
# with the latest security patches.
8888
"cryptography>=3.4.7",
8989
"ijson>=3.1",
90-
"matrix-common==1.0.0",
9190
]
9291

9392
CONDITIONAL_REQUIREMENTS = {

synapse/util/__init__.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414

1515
import json
1616
import logging
17+
import re
1718
import typing
18-
from typing import Any, Callable, Dict, Generator, Optional
19+
from typing import Any, Callable, Dict, Generator, Optional, Pattern
1920

2021
import attr
2122
from frozendict import frozendict
@@ -34,6 +35,9 @@
3435
logger = logging.getLogger(__name__)
3536

3637

38+
_WILDCARD_RUN = re.compile(r"([\?\*]+)")
39+
40+
3741
def _reject_invalid_json(val: Any) -> None:
3842
"""Do not allow Infinity, -Infinity, or NaN values in JSON."""
3943
raise ValueError("Invalid JSON value: '%s'" % val)
@@ -181,3 +185,56 @@ def log_failure(
181185
if not consumeErrors:
182186
return failure
183187
return None
188+
189+
190+
def glob_to_regex(glob: str, word_boundary: bool = False) -> Pattern:
191+
"""Converts a glob to a compiled regex object.
192+
193+
Args:
194+
glob: pattern to match
195+
word_boundary: If True, the pattern will be allowed to match at word boundaries
196+
anywhere in the string. Otherwise, the pattern is anchored at the start and
197+
end of the string.
198+
199+
Returns:
200+
compiled regex pattern
201+
"""
202+
203+
# Patterns with wildcards must be simplified to avoid performance cliffs
204+
# - The glob `?**?**?` is equivalent to the glob `???*`
205+
# - The glob `???*` is equivalent to the regex `.{3,}`
206+
chunks = []
207+
for chunk in _WILDCARD_RUN.split(glob):
208+
# No wildcards? re.escape()
209+
if not _WILDCARD_RUN.match(chunk):
210+
chunks.append(re.escape(chunk))
211+
continue
212+
213+
# Wildcards? Simplify.
214+
qmarks = chunk.count("?")
215+
if "*" in chunk:
216+
chunks.append(".{%d,}" % qmarks)
217+
else:
218+
chunks.append(".{%d}" % qmarks)
219+
220+
res = "".join(chunks)
221+
222+
if word_boundary:
223+
res = re_word_boundary(res)
224+
else:
225+
# \A anchors at start of string, \Z at end of string
226+
res = r"\A" + res + r"\Z"
227+
228+
return re.compile(res, re.IGNORECASE)
229+
230+
231+
def re_word_boundary(r: str) -> str:
232+
"""
233+
Adds word boundary characters to the start and end of an
234+
expression to require that the match occur as a whole word,
235+
but do so respecting the fact that strings starting or ending
236+
with non-word characters will change word boundaries.
237+
"""
238+
# we can't use \b as it chokes on unicode. however \W seems to be okay
239+
# as shorthand for [^0-9A-Za-z_].
240+
return r"(^|\W)%s(\W|$)" % (r,)

tests/util/test_glob_to_regex.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2021 The Matrix.org Foundation C.I.C.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from synapse.util import glob_to_regex
15+
16+
from tests.unittest import TestCase
17+
18+
19+
class GlobToRegexTestCase(TestCase):
20+
def test_literal_match(self):
21+
"""patterns without wildcards should match"""
22+
pat = glob_to_regex("foobaz")
23+
self.assertTrue(
24+
pat.match("FoobaZ"), "patterns should match and be case-insensitive"
25+
)
26+
self.assertFalse(
27+
pat.match("x foobaz"), "pattern should not match at word boundaries"
28+
)
29+
30+
def test_wildcard_match(self):
31+
pat = glob_to_regex("f?o*baz")
32+
33+
self.assertTrue(
34+
pat.match("FoobarbaZ"),
35+
"* should match string and pattern should be case-insensitive",
36+
)
37+
self.assertTrue(pat.match("foobaz"), "* should match 0 characters")
38+
self.assertFalse(pat.match("fooxaz"), "the character after * must match")
39+
self.assertFalse(pat.match("fobbaz"), "? should not match 0 characters")
40+
self.assertFalse(pat.match("fiiobaz"), "? should not match 2 characters")
41+
42+
def test_multi_wildcard(self):
43+
"""patterns with multiple wildcards in a row should match"""
44+
pat = glob_to_regex("**baz")
45+
self.assertTrue(pat.match("agsgsbaz"), "** should match any string")
46+
self.assertTrue(pat.match("baz"), "** should match the empty string")
47+
self.assertEqual(pat.pattern, r"\A.{0,}baz\Z")
48+
49+
pat = glob_to_regex("*?baz")
50+
self.assertTrue(pat.match("agsgsbaz"), "*? should match any string")
51+
self.assertTrue(pat.match("abaz"), "*? should match a single char")
52+
self.assertFalse(pat.match("baz"), "*? should not match the empty string")
53+
self.assertEqual(pat.pattern, r"\A.{1,}baz\Z")
54+
55+
pat = glob_to_regex("a?*?*?baz")
56+
self.assertTrue(pat.match("a g baz"), "?*?*? should match 3 chars")
57+
self.assertFalse(pat.match("a..baz"), "?*?*? should not match 2 chars")
58+
self.assertTrue(pat.match("a.gg.baz"), "?*?*? should match 4 chars")
59+
self.assertEqual(pat.pattern, r"\Aa.{3,}baz\Z")

0 commit comments

Comments
 (0)