Skip to content

Commit 22980dc

Browse files
authored
gh-106752: Sync with zipp 3.16.2 (#106757)
* gh-106752: Sync with zipp 3.16.2 * Add blurb
1 parent 2566b74 commit 22980dc

File tree

6 files changed

+204
-27
lines changed

6 files changed

+204
-27
lines changed

Lib/test/test_zipfile/_path/test_complexity.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
import unittest
1+
import io
2+
import itertools
3+
import math
4+
import re
25
import string
6+
import unittest
37
import zipfile
48

59
from ._functools import compose
@@ -9,9 +13,11 @@
913

1014

1115
big_o = import_or_skip('big_o')
16+
pytest = import_or_skip('pytest')
1217

1318

1419
class TestComplexity(unittest.TestCase):
20+
@pytest.mark.flaky
1521
def test_implied_dirs_performance(self):
1622
best, others = big_o.big_o(
1723
compose(consume, zipfile.CompleteDirs._implied_dirs),
@@ -22,3 +28,76 @@ def test_implied_dirs_performance(self):
2228
min_n=1,
2329
)
2430
assert best <= big_o.complexities.Linear
31+
32+
def make_zip_path(self, depth=1, width=1) -> zipfile.Path:
33+
"""
34+
Construct a Path with width files at every level of depth.
35+
"""
36+
zf = zipfile.ZipFile(io.BytesIO(), mode='w')
37+
pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width))
38+
for path, name in pairs:
39+
zf.writestr(f"{path}{name}.txt", b'')
40+
zf.filename = "big un.zip"
41+
return zipfile.Path(zf)
42+
43+
@classmethod
44+
def make_names(cls, width, letters=string.ascii_lowercase):
45+
"""
46+
>>> list(TestComplexity.make_names(2))
47+
['a', 'b']
48+
>>> list(TestComplexity.make_names(30))
49+
['aa', 'ab', ..., 'bd']
50+
"""
51+
# determine how many products are needed to produce width
52+
n_products = math.ceil(math.log(width, len(letters)))
53+
inputs = (letters,) * n_products
54+
combinations = itertools.product(*inputs)
55+
names = map(''.join, combinations)
56+
return itertools.islice(names, width)
57+
58+
@classmethod
59+
def make_deep_paths(cls, depth):
60+
return map(cls.make_deep_path, range(depth))
61+
62+
@classmethod
63+
def make_deep_path(cls, depth):
64+
return ''.join(('d/',) * depth)
65+
66+
def test_baseline_regex_complexity(self):
67+
best, others = big_o.big_o(
68+
lambda path: re.fullmatch(r'[^/]*\\.txt', path),
69+
self.make_deep_path,
70+
max_n=100,
71+
min_n=1,
72+
)
73+
assert best <= big_o.complexities.Constant
74+
75+
@pytest.mark.flaky
76+
def test_glob_depth(self):
77+
best, others = big_o.big_o(
78+
lambda path: consume(path.glob('*.txt')),
79+
self.make_zip_path,
80+
max_n=100,
81+
min_n=1,
82+
)
83+
assert best <= big_o.complexities.Quadratic
84+
85+
@pytest.mark.flaky
86+
def test_glob_width(self):
87+
best, others = big_o.big_o(
88+
lambda path: consume(path.glob('*.txt')),
89+
lambda size: self.make_zip_path(width=size),
90+
max_n=100,
91+
min_n=1,
92+
)
93+
assert best <= big_o.complexities.Linear
94+
95+
@pytest.mark.flaky
96+
def test_glob_width_and_depth(self):
97+
best, others = big_o.big_o(
98+
lambda path: consume(path.glob('*.txt')),
99+
lambda size: self.make_zip_path(depth=size, width=size),
100+
max_n=10,
101+
min_n=1,
102+
)
103+
assert best <= big_o.complexities.Linear

Lib/test/test_zipfile/_path/test_path.py

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,21 @@ def build_alpharep_fixture():
4141
│ ├── d
4242
│ │ └── e.txt
4343
│ └── f.txt
44-
└── g
45-
└── h
46-
└── i.txt
44+
├── g
45+
│ └── h
46+
│ └── i.txt
47+
└── j
48+
├── k.bin
49+
├── l.baz
50+
└── m.bar
4751
4852
This fixture has the following key characteristics:
4953
5054
- a file at the root (a)
5155
- a file two levels deep (b/d/e)
5256
- multiple files in a directory (b/c, b/f)
5357
- a directory containing only a directory (g/h)
58+
- a directory with files of different extensions (j/klm)
5459
5560
"alpha" because it uses alphabet
5661
"rep" because it's a representative example
@@ -62,6 +67,9 @@ def build_alpharep_fixture():
6267
zf.writestr("b/d/e.txt", b"content of e")
6368
zf.writestr("b/f.txt", b"content of f")
6469
zf.writestr("g/h/i.txt", b"content of i")
70+
zf.writestr("j/k.bin", b"content of k")
71+
zf.writestr("j/l.baz", b"content of l")
72+
zf.writestr("j/m.bar", b"content of m")
6573
zf.filename = "alpharep.zip"
6674
return zf
6775

@@ -92,7 +100,7 @@ def zipfile_ondisk(self, alpharep):
92100
def test_iterdir_and_types(self, alpharep):
93101
root = zipfile.Path(alpharep)
94102
assert root.is_dir()
95-
a, b, g = root.iterdir()
103+
a, b, g, j = root.iterdir()
96104
assert a.is_file()
97105
assert b.is_dir()
98106
assert g.is_dir()
@@ -112,7 +120,7 @@ def test_is_file_missing(self, alpharep):
112120
@pass_alpharep
113121
def test_iterdir_on_file(self, alpharep):
114122
root = zipfile.Path(alpharep)
115-
a, b, g = root.iterdir()
123+
a, b, g, j = root.iterdir()
116124
with self.assertRaises(ValueError):
117125
a.iterdir()
118126

@@ -127,7 +135,7 @@ def test_subdir_is_dir(self, alpharep):
127135
@pass_alpharep
128136
def test_open(self, alpharep):
129137
root = zipfile.Path(alpharep)
130-
a, b, g = root.iterdir()
138+
a, b, g, j = root.iterdir()
131139
with a.open(encoding="utf-8") as strm:
132140
data = strm.read()
133141
self.assertEqual(data, "content of a")
@@ -229,7 +237,7 @@ def test_open_missing_directory(self):
229237
@pass_alpharep
230238
def test_read(self, alpharep):
231239
root = zipfile.Path(alpharep)
232-
a, b, g = root.iterdir()
240+
a, b, g, j = root.iterdir()
233241
assert a.read_text(encoding="utf-8") == "content of a"
234242
# Also check positional encoding arg (gh-101144).
235243
assert a.read_text("utf-8") == "content of a"
@@ -295,7 +303,7 @@ def test_mutability(self, alpharep):
295303
reflect that change.
296304
"""
297305
root = zipfile.Path(alpharep)
298-
a, b, g = root.iterdir()
306+
a, b, g, j = root.iterdir()
299307
alpharep.writestr('foo.txt', 'foo')
300308
alpharep.writestr('bar/baz.txt', 'baz')
301309
assert any(child.name == 'foo.txt' for child in root.iterdir())
@@ -394,6 +402,13 @@ def test_suffixes(self, alpharep):
394402
e = root / '.hgrc'
395403
assert e.suffixes == []
396404

405+
@pass_alpharep
406+
def test_suffix_no_filename(self, alpharep):
407+
alpharep.filename = None
408+
root = zipfile.Path(alpharep)
409+
assert root.joinpath('example').suffix == ""
410+
assert root.joinpath('example').suffixes == []
411+
397412
@pass_alpharep
398413
def test_stem(self, alpharep):
399414
"""
@@ -411,6 +426,8 @@ def test_stem(self, alpharep):
411426
d = root / "d"
412427
assert d.stem == "d"
413428

429+
assert (root / ".gitignore").stem == ".gitignore"
430+
414431
@pass_alpharep
415432
def test_root_parent(self, alpharep):
416433
root = zipfile.Path(alpharep)
@@ -442,12 +459,49 @@ def test_match_and_glob(self, alpharep):
442459
assert not root.match("*.txt")
443460

444461
assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")]
462+
assert list(root.glob("b/*.txt")) == [
463+
zipfile.Path(alpharep, "b/c.txt"),
464+
zipfile.Path(alpharep, "b/f.txt"),
465+
]
445466

467+
@pass_alpharep
468+
def test_glob_recursive(self, alpharep):
469+
root = zipfile.Path(alpharep)
446470
files = root.glob("**/*.txt")
447471
assert all(each.match("*.txt") for each in files)
448472

449473
assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt"))
450474

475+
@pass_alpharep
476+
def test_glob_subdirs(self, alpharep):
477+
root = zipfile.Path(alpharep)
478+
479+
assert list(root.glob("*/i.txt")) == []
480+
assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")]
481+
482+
@pass_alpharep
483+
def test_glob_does_not_overmatch_dot(self, alpharep):
484+
root = zipfile.Path(alpharep)
485+
486+
assert list(root.glob("*.xt")) == []
487+
488+
@pass_alpharep
489+
def test_glob_single_char(self, alpharep):
490+
root = zipfile.Path(alpharep)
491+
492+
assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")]
493+
assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")]
494+
assert list(root.glob("a[?]txt")) == []
495+
496+
@pass_alpharep
497+
def test_glob_chars(self, alpharep):
498+
root = zipfile.Path(alpharep)
499+
500+
assert list(root.glob("j/?.b[ai][nz]")) == [
501+
zipfile.Path(alpharep, "j/k.bin"),
502+
zipfile.Path(alpharep, "j/l.baz"),
503+
]
504+
451505
def test_glob_empty(self):
452506
root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w'))
453507
with self.assertRaises(ValueError):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from . import test_path
2+
3+
4+
__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep')

Lib/zipfile/_path/__init__.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import contextlib
66
import pathlib
77
import re
8-
import fnmatch
8+
9+
from .glob import translate
910

1011

1112
__all__ = ['Path']
@@ -296,21 +297,24 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
296297
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
297298
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
298299

300+
def _base(self):
301+
return pathlib.PurePosixPath(self.at or self.root.filename)
302+
299303
@property
300304
def name(self):
301-
return pathlib.Path(self.at).name or self.filename.name
305+
return self._base().name
302306

303307
@property
304308
def suffix(self):
305-
return pathlib.Path(self.at).suffix or self.filename.suffix
309+
return self._base().suffix
306310

307311
@property
308312
def suffixes(self):
309-
return pathlib.Path(self.at).suffixes or self.filename.suffixes
313+
return self._base().suffixes
310314

311315
@property
312316
def stem(self):
313-
return pathlib.Path(self.at).stem or self.filename.stem
317+
return self._base().stem
314318

315319
@property
316320
def filename(self):
@@ -347,30 +351,21 @@ def iterdir(self):
347351
return filter(self._is_child, subs)
348352

349353
def match(self, path_pattern):
350-
return pathlib.Path(self.at).match(path_pattern)
354+
return pathlib.PurePosixPath(self.at).match(path_pattern)
351355

352356
def is_symlink(self):
353357
"""
354358
Return whether this path is a symlink. Always false (python/cpython#82102).
355359
"""
356360
return False
357361

358-
def _descendants(self):
359-
for child in self.iterdir():
360-
yield child
361-
if child.is_dir():
362-
yield from child._descendants()
363-
364362
def glob(self, pattern):
365363
if not pattern:
366364
raise ValueError(f"Unacceptable pattern: {pattern!r}")
367365

368-
matches = re.compile(fnmatch.translate(pattern)).fullmatch
369-
return (
370-
child
371-
for child in self._descendants()
372-
if matches(str(child.relative_to(self)))
373-
)
366+
prefix = re.escape(self.at)
367+
matches = re.compile(prefix + translate(pattern)).fullmatch
368+
return map(self._next, filter(matches, self.root.namelist()))
374369

375370
def rglob(self, pattern):
376371
return self.glob(f'**/{pattern}')

Lib/zipfile/_path/glob.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import re
2+
3+
4+
def translate(pattern):
5+
r"""
6+
Given a glob pattern, produce a regex that matches it.
7+
8+
>>> translate('*.txt')
9+
'[^/]*\\.txt'
10+
>>> translate('a?txt')
11+
'a.txt'
12+
>>> translate('**/*')
13+
'.*/[^/]*'
14+
"""
15+
return ''.join(map(replace, separate(pattern)))
16+
17+
18+
def separate(pattern):
19+
"""
20+
Separate out character sets to avoid translating their contents.
21+
22+
>>> [m.group(0) for m in separate('*.txt')]
23+
['*.txt']
24+
>>> [m.group(0) for m in separate('a[?]txt')]
25+
['a', '[?]', 'txt']
26+
"""
27+
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)
28+
29+
30+
def replace(match):
31+
"""
32+
Perform the replacements for a match from :func:`separate`.
33+
"""
34+
35+
return match.group('set') or (
36+
re.escape(match.group(0))
37+
.replace('\\*\\*', r'.*')
38+
.replace('\\*', r'[^/]*')
39+
.replace('\\?', r'.')
40+
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows
2+
separators are no longer honored (and never were meant to be); Fixed
3+
``name``/``suffix``/``suffixes``/``stem`` operations when no filename is
4+
present and the Path is not at the root of the zipfile; Reworked glob for
5+
performance and more correct matching behavior.

0 commit comments

Comments
 (0)