Skip to content

[3.12] gh-106752: Sync with zipp 3.16.2 (GH-106757) #106777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 80 additions & 1 deletion Lib/test/test_zipfile/_path/test_complexity.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import unittest
import io
import itertools
import math
import re
import string
import unittest
import zipfile

from ._functools import compose
Expand All @@ -9,9 +13,11 @@


big_o = import_or_skip('big_o')
pytest = import_or_skip('pytest')


class TestComplexity(unittest.TestCase):
@pytest.mark.flaky
def test_implied_dirs_performance(self):
best, others = big_o.big_o(
compose(consume, zipfile.CompleteDirs._implied_dirs),
Expand All @@ -22,3 +28,76 @@ def test_implied_dirs_performance(self):
min_n=1,
)
assert best <= big_o.complexities.Linear

def make_zip_path(self, depth=1, width=1) -> zipfile.Path:
"""
Construct a Path with width files at every level of depth.
"""
zf = zipfile.ZipFile(io.BytesIO(), mode='w')
pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width))
for path, name in pairs:
zf.writestr(f"{path}{name}.txt", b'')
zf.filename = "big un.zip"
return zipfile.Path(zf)

@classmethod
def make_names(cls, width, letters=string.ascii_lowercase):
"""
>>> list(TestComplexity.make_names(2))
['a', 'b']
>>> list(TestComplexity.make_names(30))
['aa', 'ab', ..., 'bd']
"""
# determine how many products are needed to produce width
n_products = math.ceil(math.log(width, len(letters)))
inputs = (letters,) * n_products
combinations = itertools.product(*inputs)
names = map(''.join, combinations)
return itertools.islice(names, width)

@classmethod
def make_deep_paths(cls, depth):
return map(cls.make_deep_path, range(depth))

@classmethod
def make_deep_path(cls, depth):
return ''.join(('d/',) * depth)

def test_baseline_regex_complexity(self):
best, others = big_o.big_o(
lambda path: re.fullmatch(r'[^/]*\\.txt', path),
self.make_deep_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Constant

@pytest.mark.flaky
def test_glob_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
self.make_zip_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Quadratic

@pytest.mark.flaky
def test_glob_width(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(width=size),
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Linear

@pytest.mark.flaky
def test_glob_width_and_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(depth=size, width=size),
max_n=10,
min_n=1,
)
assert best <= big_o.complexities.Linear
70 changes: 62 additions & 8 deletions Lib/test/test_zipfile/_path/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,21 @@ def build_alpharep_fixture():
│ ├── d
│ │ └── e.txt
│ └── f.txt
└── g
└── h
└── i.txt
├── g
│ └── h
│ └── i.txt
└── j
├── k.bin
├── l.baz
└── m.bar

This fixture has the following key characteristics:

- a file at the root (a)
- a file two levels deep (b/d/e)
- multiple files in a directory (b/c, b/f)
- a directory containing only a directory (g/h)
- a directory with files of different extensions (j/klm)

"alpha" because it uses alphabet
"rep" because it's a representative example
Expand All @@ -62,6 +67,9 @@ def build_alpharep_fixture():
zf.writestr("b/d/e.txt", b"content of e")
zf.writestr("b/f.txt", b"content of f")
zf.writestr("g/h/i.txt", b"content of i")
zf.writestr("j/k.bin", b"content of k")
zf.writestr("j/l.baz", b"content of l")
zf.writestr("j/m.bar", b"content of m")
zf.filename = "alpharep.zip"
return zf

Expand Down Expand Up @@ -92,7 +100,7 @@ def zipfile_ondisk(self, alpharep):
def test_iterdir_and_types(self, alpharep):
root = zipfile.Path(alpharep)
assert root.is_dir()
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.is_file()
assert b.is_dir()
assert g.is_dir()
Expand All @@ -112,7 +120,7 @@ def test_is_file_missing(self, alpharep):
@pass_alpharep
def test_iterdir_on_file(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with self.assertRaises(ValueError):
a.iterdir()

Expand All @@ -127,7 +135,7 @@ def test_subdir_is_dir(self, alpharep):
@pass_alpharep
def test_open(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
self.assertEqual(data, "content of a")
Expand Down Expand Up @@ -229,7 +237,7 @@ def test_open_missing_directory(self):
@pass_alpharep
def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
# Also check positional encoding arg (gh-101144).
assert a.read_text("utf-8") == "content of a"
Expand Down Expand Up @@ -295,7 +303,7 @@ def test_mutability(self, alpharep):
reflect that change.
"""
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
alpharep.writestr('foo.txt', 'foo')
alpharep.writestr('bar/baz.txt', 'baz')
assert any(child.name == 'foo.txt' for child in root.iterdir())
Expand Down Expand Up @@ -394,6 +402,13 @@ def test_suffixes(self, alpharep):
e = root / '.hgrc'
assert e.suffixes == []

@pass_alpharep
def test_suffix_no_filename(self, alpharep):
alpharep.filename = None
root = zipfile.Path(alpharep)
assert root.joinpath('example').suffix == ""
assert root.joinpath('example').suffixes == []

@pass_alpharep
def test_stem(self, alpharep):
"""
Expand All @@ -411,6 +426,8 @@ def test_stem(self, alpharep):
d = root / "d"
assert d.stem == "d"

assert (root / ".gitignore").stem == ".gitignore"

@pass_alpharep
def test_root_parent(self, alpharep):
root = zipfile.Path(alpharep)
Expand Down Expand Up @@ -442,12 +459,49 @@ def test_match_and_glob(self, alpharep):
assert not root.match("*.txt")

assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")]
assert list(root.glob("b/*.txt")) == [
zipfile.Path(alpharep, "b/c.txt"),
zipfile.Path(alpharep, "b/f.txt"),
]

@pass_alpharep
def test_glob_recursive(self, alpharep):
root = zipfile.Path(alpharep)
files = root.glob("**/*.txt")
assert all(each.match("*.txt") for each in files)

assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt"))

@pass_alpharep
def test_glob_subdirs(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("*/i.txt")) == []
assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")]

@pass_alpharep
def test_glob_does_not_overmatch_dot(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("*.xt")) == []

@pass_alpharep
def test_glob_single_char(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[?]txt")) == []

@pass_alpharep
def test_glob_chars(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("j/?.b[ai][nz]")) == [
zipfile.Path(alpharep, "j/k.bin"),
zipfile.Path(alpharep, "j/l.baz"),
]

def test_glob_empty(self):
root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w'))
with self.assertRaises(ValueError):
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_zipfile/_path/write-alpharep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import test_path


__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep')
31 changes: 13 additions & 18 deletions Lib/zipfile/_path/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import contextlib
import pathlib
import re
import fnmatch

from .glob import translate


__all__ = ['Path']
Expand Down Expand Up @@ -296,21 +297,24 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

def _base(self):
return pathlib.PurePosixPath(self.at or self.root.filename)

@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
return self._base().name

@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
return self._base().suffix

@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
return self._base().suffixes

@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
return self._base().stem

@property
def filename(self):
Expand Down Expand Up @@ -347,30 +351,21 @@ def iterdir(self):
return filter(self._is_child, subs)

def match(self, path_pattern):
return pathlib.Path(self.at).match(path_pattern)
return pathlib.PurePosixPath(self.at).match(path_pattern)

def is_symlink(self):
"""
Return whether this path is a symlink. Always false (python/cpython#82102).
"""
return False

def _descendants(self):
for child in self.iterdir():
yield child
if child.is_dir():
yield from child._descendants()

def glob(self, pattern):
if not pattern:
raise ValueError(f"Unacceptable pattern: {pattern!r}")

matches = re.compile(fnmatch.translate(pattern)).fullmatch
return (
child
for child in self._descendants()
if matches(str(child.relative_to(self)))
)
prefix = re.escape(self.at)
matches = re.compile(prefix + translate(pattern)).fullmatch
return map(self._next, filter(matches, self.root.namelist()))

def rglob(self, pattern):
return self.glob(f'**/{pattern}')
Expand Down
40 changes: 40 additions & 0 deletions Lib/zipfile/_path/glob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import re


def translate(pattern):
r"""
Given a glob pattern, produce a regex that matches it.

>>> translate('*.txt')
'[^/]*\\.txt'
>>> translate('a?txt')
'a.txt'
>>> translate('**/*')
'.*/[^/]*'
"""
return ''.join(map(replace, separate(pattern)))


def separate(pattern):
"""
Separate out character sets to avoid translating their contents.

>>> [m.group(0) for m in separate('*.txt')]
['*.txt']
>>> [m.group(0) for m in separate('a[?]txt')]
['a', '[?]', 'txt']
"""
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)


def replace(match):
"""
Perform the replacements for a match from :func:`separate`.
"""

return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', r'[^/]*')
.replace('\\?', r'.')
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows
separators are no longer honored (and never were meant to be); Fixed
``name``/``suffix``/``suffixes``/``stem`` operations when no filename is
present and the Path is not at the root of the zipfile; Reworked glob for
performance and more correct matching behavior.