Move `glob_to_regex` and `re_word_boundary` to `matrix-python-common` (#11505)

преди 2 години · a77c369897
--- a/changelog.d/11505.misc
+++ b/changelog.d/11505.misc
@@ -0,0 +1 @@
 Move `glob_to_regex` and `re_word_boundary` to `matrix-python-common`.
--- a/synapse/config/room_directory.py
+++ b/synapse/config/room_directory.py
@@ -15,8 +15,9 @@

 from typing import List

 from matrix_common.regex import glob_to_regex

 from synapse.types import JsonDict
 from synapse.util import glob_to_regex

 from ._base import Config, ConfigError

--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -16,11 +16,12 @@ import logging
 import os
 from typing import List, Optional, Pattern

 from matrix_common.regex import glob_to_regex

 from OpenSSL import SSL, crypto
 from twisted.internet._sslverify import Certificate, trustRootFromCertificates

 from synapse.config._base import Config, ConfigError
 from synapse.util import glob_to_regex

 logger = logging.getLogger(__name__)

--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -28,6 +28,7 @@ from typing import (
    Union,
 )

 from matrix_common.regex import glob_to_regex
 from prometheus_client import Counter, Gauge, Histogram

 from twisted.internet import defer
@@ -66,7 +67,7 @@ from synapse.replication.http.federation import (
 )
 from synapse.storage.databases.main.lock import Lock
 from synapse.types import JsonDict, get_domain_from_id
 from synapse.util import glob_to_regex, json_decoder, unwrapFirstError
 from synapse.util import json_decoder, unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import parse_server_name
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -17,9 +17,10 @@ import logging
 import re
 from typing import Any, Dict, List, Optional, Pattern, Tuple, Union

 from matrix_common.regex import glob_to_regex, to_word_pattern

 from synapse.events import EventBase
 from synapse.types import JsonDict, UserID
 from synapse.util import glob_to_regex, re_word_boundary
 from synapse.util.caches.lrucache import LruCache

 logger = logging.getLogger(__name__)
@@ -184,7 +185,7 @@ class PushRuleEvaluatorForEvent:
        r = regex_cache.get((display_name, False, True), None)
        if not r:
            r1 = re.escape(display_name)
            r1 = re_word_boundary(r1)
            r1 = to_word_pattern(r1)
            r = re.compile(r1, flags=re.IGNORECASE)
            regex_cache[(display_name, False, True)] = r

@@ -213,7 +214,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
    try:
        r = regex_cache.get((glob, True, word_boundary), None)
        if not r:
            r = glob_to_regex(glob, word_boundary)
            r = glob_to_regex(glob, word_boundary=word_boundary)
            regex_cache[(glob, True, word_boundary)] = r
        return bool(r.search(value))
    except re.error:
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -87,6 +87,7 @@ REQUIREMENTS = [
    # with the latest security patches.
    "cryptography>=3.4.7",
    "ijson>=3.1",
    "matrix-common==1.0.0",
 ]

 CONDITIONAL_REQUIREMENTS = {
--- a/synapse/util/init.py
+++ b/synapse/util/init.py
@@ -14,9 +14,8 @@

 import json
 import logging
 import re
 import typing
 from typing import Any, Callable, Dict, Generator, Optional, Pattern
 from typing import Any, Callable, Dict, Generator, Optional

 import attr
 from frozendict import frozendict
@@ -35,9 +34,6 @@ if typing.TYPE_CHECKING:
 logger = logging.getLogger(__name__)


 _WILDCARD_RUN = re.compile(r"([\?\*]+)")


 def _reject_invalid_json(val: Any) -> None:
    """Do not allow Infinity, -Infinity, or NaN values in JSON."""
    raise ValueError("Invalid JSON value: '%s'" % val)
@@ -185,56 +181,3 @@ def log_failure(
    if not consumeErrors:
        return failure
    return None


 def glob_to_regex(glob: str, word_boundary: bool = False) -> Pattern:
    """Converts a glob to a compiled regex object.

    Args:
        glob: pattern to match
        word_boundary: If True, the pattern will be allowed to match at word boundaries
           anywhere in the string. Otherwise, the pattern is anchored at the start and
           end of the string.

    Returns:
        compiled regex pattern
    """

    # Patterns with wildcards must be simplified to avoid performance cliffs
    # - The glob `?**?**?` is equivalent to the glob `???*`
    # - The glob `???*` is equivalent to the regex `.{3,}`
    chunks = []
    for chunk in _WILDCARD_RUN.split(glob):
        # No wildcards? re.escape()
        if not _WILDCARD_RUN.match(chunk):
            chunks.append(re.escape(chunk))
            continue

        # Wildcards? Simplify.
        qmarks = chunk.count("?")
        if "*" in chunk:
            chunks.append(".{%d,}" % qmarks)
        else:
            chunks.append(".{%d}" % qmarks)

    res = "".join(chunks)

    if word_boundary:
        res = re_word_boundary(res)
    else:
        # \A anchors at start of string, \Z at end of string
        res = r"\A" + res + r"\Z"

    return re.compile(res, re.IGNORECASE)


 def re_word_boundary(r: str) -> str:
    """
    Adds word boundary characters to the start and end of an
    expression to require that the match occur as a whole word,
    but do so respecting the fact that strings starting or ending
    with non-word characters will change word boundaries.
    """
    # we can't use \b as it chokes on unicode. however \W seems to be okay
    # as shorthand for [^0-9A-Za-z_].
    return r"(^|\W)%s(\W|$)" % (r,)
--- a/tests/util/test_glob_to_regex.py
+++ b/tests/util/test_glob_to_regex.py
@@ -1,59 +0,0 @@
 # Copyright 2021 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from synapse.util import glob_to_regex

 from tests.unittest import TestCase


 class GlobToRegexTestCase(TestCase):
    def test_literal_match(self):
        """patterns without wildcards should match"""
        pat = glob_to_regex("foobaz")
        self.assertTrue(
            pat.match("FoobaZ"), "patterns should match and be case-insensitive"
        )
        self.assertFalse(
            pat.match("x foobaz"), "pattern should not match at word boundaries"
        )

    def test_wildcard_match(self):
        pat = glob_to_regex("f?o*baz")

        self.assertTrue(
            pat.match("FoobarbaZ"),
            "* should match string and pattern should be case-insensitive",
        )
        self.assertTrue(pat.match("foobaz"), "* should match 0 characters")
        self.assertFalse(pat.match("fooxaz"), "the character after * must match")
        self.assertFalse(pat.match("fobbaz"), "? should not match 0 characters")
        self.assertFalse(pat.match("fiiobaz"), "? should not match 2 characters")

    def test_multi_wildcard(self):
        """patterns with multiple wildcards in a row should match"""
        pat = glob_to_regex("**baz")
        self.assertTrue(pat.match("agsgsbaz"), "** should match any string")
        self.assertTrue(pat.match("baz"), "** should match the empty string")
        self.assertEqual(pat.pattern, r"\A.{0,}baz\Z")

        pat = glob_to_regex("*?baz")
        self.assertTrue(pat.match("agsgsbaz"), "*? should match any string")
        self.assertTrue(pat.match("abaz"), "*? should match a single char")
        self.assertFalse(pat.match("baz"), "*? should not match the empty string")
        self.assertEqual(pat.pattern, r"\A.{1,}baz\Z")

        pat = glob_to_regex("a?*?*?baz")
        self.assertTrue(pat.match("a g baz"), "?*?*? should match 3 chars")
        self.assertFalse(pat.match("a..baz"), "?*?*? should not match 2 chars")
        self.assertTrue(pat.match("a.gg.baz"), "?*?*? should match 4 chars")
        self.assertEqual(pat.pattern, r"\Aa.{3,}baz\Z")