Skip to content

Commit d247c1e

Browse files
committed
importlib: Read distribution name/version from metadata directory name, if possible
importlib does not cache metadata in-memory, so querying even simple attributes like distribution names and versions can quickly become expensive (as each access requires reading METADATA). Fortunately, `Distribution.canonical_name` is optimized to parse the metadata directory name to query the name if possible. This commit extends this optimization to the finder implementation and version attribute. .egg-info directory names tend to not include the version so they are not considered for optimizing version lookup. simplewheel-2.0-1-py2.py3-none-any.whl had to be modified to rename the .dist-info directory which mistakenly included the wheel build tag (in violation of the wheel specification). simplewheel/__init__.py simplewheel-2.0-1.dist-info/DESCRIPTION.rst simplewheel-2.0-1.dist-info/metadata.json simplewheel-2.0-1.dist-info/top_level.txt simplewheel-2.0-1.dist-info/WHEEL simplewheel-2.0-1.dist-info/METADATA simplewheel-2.0-1.dist-info/RECORD Otherwise, it was mistaken for part of the version and led pip to think the wheel was a post-release, breaking tests...
1 parent 5545a15 commit d247c1e

File tree

8 files changed

+50
-27
lines changed

8 files changed

+50
-27
lines changed

news/12656.feature.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve discovery performance of installed packages when the
2+
``importlib.metadata`` backend is used to load distribution metadata
3+
(used by default under Python 3.11+).

news/aa82171b-1578-4128-8db3-9aa72b3a6a84.trivial.rst

Whitespace-only changes.

src/pip/_internal/metadata/importlib/_compat.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import importlib.metadata
2-
from typing import Any, Optional, Protocol, cast
2+
import os
3+
from typing import Any, Optional, Protocol, Tuple, cast
4+
5+
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
36

47

58
class BadMetadata(ValueError):
@@ -43,13 +46,39 @@ def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]:
4346
return getattr(d, "_path", None)
4447

4548

46-
def get_dist_name(dist: importlib.metadata.Distribution) -> str:
47-
"""Get the distribution's project name.
49+
def parse_name_and_version_from_info_directory(
50+
dist: importlib.metadata.Distribution,
51+
) -> Tuple[Optional[str], Optional[str]]:
52+
"""Get a name and version from the metadata directory name.
53+
54+
This is much faster than reading distribution metadata.
55+
"""
56+
info_location = get_info_location(dist)
57+
if info_location is None:
58+
return None, None
59+
60+
stem, suffix = os.path.splitext(info_location.name)
61+
if suffix == ".dist-info" and stem.count("-") == 1:
62+
name, version = stem.split("-")
63+
return name, version
64+
65+
if suffix == ".egg-info":
66+
name = stem.split("-", 1)[0]
67+
return name, None
68+
69+
return None, None
70+
71+
72+
def get_dist_canonical_name(dist: importlib.metadata.Distribution) -> NormalizedName:
73+
"""Get the distribution's normalized name.
4874
4975
The ``name`` attribute is only available in Python 3.10 or later. We are
5076
targeting exactly that, but Mypy does not know this.
5177
"""
78+
if name := parse_name_and_version_from_info_directory(dist)[0]:
79+
return canonicalize_name(name)
80+
5281
name = cast(Any, dist).name
5382
if not isinstance(name, str):
5483
raise BadMetadata(dist, reason="invalid metadata entry 'name'")
55-
return name
84+
return canonicalize_name(name)

src/pip/_internal/metadata/importlib/_dists.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import email.message
22
import importlib.metadata
3-
import os
43
import pathlib
54
import zipfile
65
from typing import (
@@ -30,7 +29,11 @@
3029
from pip._internal.utils.temp_dir import TempDirectory
3130
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
3231

33-
from ._compat import BasePath, get_dist_name
32+
from ._compat import (
33+
BasePath,
34+
get_dist_canonical_name,
35+
parse_name_and_version_from_info_directory,
36+
)
3437

3538

3639
class WheelDistribution(importlib.metadata.Distribution):
@@ -153,25 +156,14 @@ def installed_location(self) -> Optional[str]:
153156
return None
154157
return normalize_path(str(self._installed_location))
155158

156-
def _get_dist_name_from_location(self) -> Optional[str]:
157-
"""Try to get the name from the metadata directory name.
158-
159-
This is much faster than reading metadata.
160-
"""
161-
if self._info_location is None:
162-
return None
163-
stem, suffix = os.path.splitext(self._info_location.name)
164-
if suffix not in (".dist-info", ".egg-info"):
165-
return None
166-
return stem.split("-", 1)[0]
167-
168159
@property
169160
def canonical_name(self) -> NormalizedName:
170-
name = self._get_dist_name_from_location() or get_dist_name(self._dist)
171-
return canonicalize_name(name)
161+
return get_dist_canonical_name(self._dist)
172162

173163
@property
174164
def version(self) -> Version:
165+
if version := parse_name_and_version_from_info_directory(self._dist)[1]:
166+
return parse_version(version)
175167
return parse_version(self._dist.version)
176168

177169
@property

src/pip/_internal/metadata/importlib/_envs.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pip._internal.utils.deprecation import deprecated
1616
from pip._internal.utils.filetypes import WHEEL_EXTENSION
1717

18-
from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location
18+
from ._compat import BadMetadata, BasePath, get_dist_canonical_name, get_info_location
1919
from ._dists import Distribution
2020

2121
logger = logging.getLogger(__name__)
@@ -61,14 +61,13 @@ def _find_impl(self, location: str) -> Iterator[FoundResult]:
6161
for dist in importlib.metadata.distributions(path=[location]):
6262
info_location = get_info_location(dist)
6363
try:
64-
raw_name = get_dist_name(dist)
64+
name = get_dist_canonical_name(dist)
6565
except BadMetadata as e:
6666
logger.warning("Skipping %s due to %s", info_location, e.reason)
6767
continue
68-
normalized_name = canonicalize_name(raw_name)
69-
if normalized_name in self._found_names:
68+
if name in self._found_names:
7069
continue
71-
self._found_names.add(normalized_name)
70+
self._found_names.add(name)
7271
yield dist, info_location
7372

7473
def find(self, location: str) -> Iterator[BaseDistribution]:
Binary file not shown.

tests/functional/test_install.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,7 @@ def test_install_nonlocal_compatible_wheel(
11721172
)
11731173
assert result.returncode == SUCCESS
11741174

1175-
distinfo = Path("scratch") / "target" / "simplewheel-2.0-1.dist-info"
1175+
distinfo = Path("scratch") / "target" / "simplewheel-2.0.dist-info"
11761176
result.did_create(distinfo)
11771177

11781178
# Test install without --target

tests/functional/test_install_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_install_report_basic(
3939
assert url.endswith("/packages/simplewheel-2.0-1-py2.py3-none-any.whl")
4040
assert (
4141
simplewheel_report["download_info"]["archive_info"]["hash"]
42-
== "sha256=191d6520d0570b13580bf7642c97ddfbb46dd04da5dd2cf7bef9f32391dfe716"
42+
== "sha256=71e1ca6b16ae3382a698c284013f66504f2581099b2ce4801f60e9536236ceee"
4343
)
4444

4545

0 commit comments

Comments
 (0)