From 2e365bdab1a37c296eae264991c2e88d57f73a67 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Tue, 1 Aug 2023 22:11:07 -0400
Subject: [PATCH 1/6] move test_download_metadata mock pypi index utilities to
conftest.py
---
tests/conftest.py | 242 ++++++++++++++++++++++++
tests/functional/test_download.py | 303 ++++--------------------------
2 files changed, 282 insertions(+), 263 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index a183cadf2e9..f481e06c8d6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,32 @@
import compileall
import fnmatch
+import http.server
import io
import os
import re
import shutil
import subprocess
import sys
+import threading
from contextlib import ExitStack, contextmanager
+from dataclasses import dataclass
+from enum import Enum
+from hashlib import sha256
from pathlib import Path
+from textwrap import dedent
from typing import (
TYPE_CHECKING,
+ Any,
AnyStr,
Callable,
+ ClassVar,
Dict,
Iterable,
Iterator,
List,
Optional,
+ Set,
+ Tuple,
Union,
)
from unittest.mock import patch
@@ -750,3 +760,235 @@ def proxy(request: pytest.FixtureRequest) -> str:
@pytest.fixture
def enable_user_site(virtualenv: VirtualEnvironment) -> None:
virtualenv.user_site_packages = True
+
+
+class MetadataKind(Enum):
+ """All the types of values we might be provided for the data-dist-info-metadata
+ attribute from PEP 658."""
+
+ # Valid: will read metadata from the dist instead.
+ No = "none"
+ # Valid: will read the .metadata file, but won't check its hash.
+ Unhashed = "unhashed"
+ # Valid: will read the .metadata file and check its hash matches.
+ Sha256 = "sha256"
+ # Invalid: will error out after checking the hash.
+ WrongHash = "wrong-hash"
+ # Invalid: will error out after failing to fetch the .metadata file.
+ NoFile = "no-file"
+
+
+@dataclass(frozen=True)
+class FakePackage:
+ """Mock package structure used to generate a PyPI repository.
+
+ FakePackage name and version should correspond to sdists (.tar.gz files) in our test
+ data."""
+
+ name: str
+ version: str
+ filename: str
+ metadata: MetadataKind
+ # This will override any dependencies specified in the actual dist's METADATA.
+ requires_dist: Tuple[str, ...] = ()
+ # This will override the Name specified in the actual dist's METADATA.
+ metadata_name: Optional[str] = None
+
+ def metadata_filename(self) -> str:
+ """This is specified by PEP 658."""
+ return f"{self.filename}.metadata"
+
+ def generate_additional_tag(self) -> str:
+ """This gets injected into the tag in the generated PyPI index page for this
+ package."""
+ if self.metadata == MetadataKind.No:
+ return ""
+ if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]:
+ return 'data-dist-info-metadata="true"'
+ if self.metadata == MetadataKind.WrongHash:
+ return 'data-dist-info-metadata="sha256=WRONG-HASH"'
+ assert self.metadata == MetadataKind.Sha256
+ checksum = sha256(self.generate_metadata()).hexdigest()
+ return f'data-dist-info-metadata="sha256={checksum}"'
+
+ def requires_str(self) -> str:
+ if not self.requires_dist:
+ return ""
+ joined = " and ".join(self.requires_dist)
+ return f"Requires-Dist: {joined}"
+
+ def generate_metadata(self) -> bytes:
+ """This is written to `self.metadata_filename()` and will override the actual
+ dist's METADATA, unless `self.metadata == MetadataKind.NoFile`."""
+ return dedent(
+ f"""\
+ Metadata-Version: 2.1
+ Name: {self.metadata_name or self.name}
+ Version: {self.version}
+ {self.requires_str()}
+ """
+ ).encode("utf-8")
+
+
+@pytest.fixture(scope="session")
+def fake_packages() -> Dict[str, List[FakePackage]]:
+ """The package database we generate for testing PEP 658 support."""
+ return {
+ "simple": [
+ FakePackage("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256),
+ FakePackage("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No),
+ # This will raise a hashing error.
+ FakePackage("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash),
+ ],
+ "simple2": [
+ # Override the dependencies here in order to force pip to download
+ # simple-1.0.tar.gz as well.
+ FakePackage(
+ "simple2",
+ "1.0",
+ "simple2-1.0.tar.gz",
+ MetadataKind.Unhashed,
+ ("simple==1.0",),
+ ),
+ # This will raise an error when pip attempts to fetch the metadata file.
+ FakePackage("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile),
+ # This has a METADATA file with a mismatched name.
+ FakePackage(
+ "simple2",
+ "3.0",
+ "simple2-3.0.tar.gz",
+ MetadataKind.Sha256,
+ metadata_name="not-simple2",
+ ),
+ ],
+ "colander": [
+ # Ensure we can read the dependencies from a metadata file within a wheel
+ # *without* PEP 658 metadata.
+ FakePackage(
+ "colander",
+ "0.9.9",
+ "colander-0.9.9-py2.py3-none-any.whl",
+ MetadataKind.No,
+ ),
+ ],
+ "compilewheel": [
+ # Ensure we can override the dependencies of a wheel file by injecting PEP
+ # 658 metadata.
+ FakePackage(
+ "compilewheel",
+ "1.0",
+ "compilewheel-1.0-py2.py3-none-any.whl",
+ MetadataKind.Unhashed,
+ ("simple==1.0",),
+ ),
+ ],
+ "has-script": [
+ # Ensure we check PEP 658 metadata hashing errors for wheel files.
+ FakePackage(
+ "has-script",
+ "1.0",
+ "has.script-1.0-py2.py3-none-any.whl",
+ MetadataKind.WrongHash,
+ ),
+ ],
+ "translationstring": [
+ FakePackage(
+ "translationstring",
+ "1.1",
+ "translationstring-1.1.tar.gz",
+ MetadataKind.No,
+ ),
+ ],
+ "priority": [
+ # Ensure we check for a missing metadata file for wheels.
+ FakePackage(
+ "priority",
+ "1.0",
+ "priority-1.0-py2.py3-none-any.whl",
+ MetadataKind.NoFile,
+ ),
+ ],
+ "requires-simple-extra": [
+ # Metadata name is not canonicalized.
+ FakePackage(
+ "requires-simple-extra",
+ "0.1",
+ "requires_simple_extra-0.1-py2.py3-none-any.whl",
+ MetadataKind.Sha256,
+ metadata_name="Requires_Simple.Extra",
+ ),
+ ],
+ }
+
+
+@pytest.fixture(scope="session")
+def html_index_for_packages(
+ shared_data: TestData,
+ fake_packages: Dict[str, List[FakePackage]],
+ tmpdir_factory: pytest.TempPathFactory,
+) -> Path:
+ """Generate a PyPI HTML package index within a local directory pointing to
+ synthetic test data."""
+ html_dir = tmpdir_factory.mktemp("fake_index_html_content")
+
+ # (1) Generate the content for a PyPI index.html.
+ pkg_links = "\n".join(
+ f' {pkg}' for pkg in fake_packages.keys()
+ )
+ index_html = f"""\
+
+
+
+
+ Simple index
+
+
+{pkg_links}
+
+"""
+ # (2) Generate the index.html in a new subdirectory of the temp directory.
+ (html_dir / "index.html").write_text(index_html)
+
+ # (3) Generate subdirectories for individual packages, each with their own
+ # index.html.
+ for pkg, links in fake_packages.items():
+ pkg_subdir = html_dir / pkg
+ pkg_subdir.mkdir()
+
+ download_links: List[str] = []
+ for package_link in links:
+ # (3.1) Generate the tag which pip can crawl pointing to this
+ # specific package version.
+ download_links.append(
+ f' {package_link.filename}
' # noqa: E501
+ )
+ # (3.2) Copy over the corresponding file in `shared_data.packages`.
+ shutil.copy(
+ shared_data.packages / package_link.filename,
+ pkg_subdir / package_link.filename,
+ )
+ # (3.3) Write a metadata file, if applicable.
+ if package_link.metadata != MetadataKind.NoFile:
+ with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
+ f.write(package_link.generate_metadata())
+
+ # (3.4) After collating all the download links and copying over the files,
+ # write an index.html with the generated download links for each
+ # copied file for this specific package name.
+ download_links_str = "\n".join(download_links)
+ pkg_index_content = f"""\
+
+
+
+
+ Links for {pkg}
+
+
+ Links for {pkg}
+{download_links_str}
+
+"""
+ with open(pkg_subdir / "index.html", "w") as f:
+ f.write(pkg_index_content)
+
+ return html_dir
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index 8da185c066e..bedadc7045b 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1,14 +1,11 @@
+import http.server
import os
import re
import shutil
import textwrap
-import uuid
-from dataclasses import dataclass
-from enum import Enum
from hashlib import sha256
from pathlib import Path
-from textwrap import dedent
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Callable, List, Tuple
import pytest
@@ -1237,181 +1234,47 @@ def test_download_use_pep517_propagation(
assert len(downloads) == 2
-class MetadataKind(Enum):
- """All the types of values we might be provided for the data-dist-info-metadata
- attribute from PEP 658."""
-
- # Valid: will read metadata from the dist instead.
- No = "none"
- # Valid: will read the .metadata file, but won't check its hash.
- Unhashed = "unhashed"
- # Valid: will read the .metadata file and check its hash matches.
- Sha256 = "sha256"
- # Invalid: will error out after checking the hash.
- WrongHash = "wrong-hash"
- # Invalid: will error out after failing to fetch the .metadata file.
- NoFile = "no-file"
-
-
-@dataclass(frozen=True)
-class Package:
- """Mock package structure used to generate a PyPI repository.
-
- Package name and version should correspond to sdists (.tar.gz files) in our test
- data."""
-
- name: str
- version: str
- filename: str
- metadata: MetadataKind
- # This will override any dependencies specified in the actual dist's METADATA.
- requires_dist: Tuple[str, ...] = ()
- # This will override the Name specified in the actual dist's METADATA.
- metadata_name: Optional[str] = None
-
- def metadata_filename(self) -> str:
- """This is specified by PEP 658."""
- return f"{self.filename}.metadata"
-
- def generate_additional_tag(self) -> str:
- """This gets injected into the tag in the generated PyPI index page for this
- package."""
- if self.metadata == MetadataKind.No:
- return ""
- if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]:
- return 'data-dist-info-metadata="true"'
- if self.metadata == MetadataKind.WrongHash:
- return 'data-dist-info-metadata="sha256=WRONG-HASH"'
- assert self.metadata == MetadataKind.Sha256
- checksum = sha256(self.generate_metadata()).hexdigest()
- return f'data-dist-info-metadata="sha256={checksum}"'
-
- def requires_str(self) -> str:
- if not self.requires_dist:
- return ""
- joined = " and ".join(self.requires_dist)
- return f"Requires-Dist: {joined}"
-
- def generate_metadata(self) -> bytes:
- """This is written to `self.metadata_filename()` and will override the actual
- dist's METADATA, unless `self.metadata == MetadataKind.NoFile`."""
- return dedent(
- f"""\
- Metadata-Version: 2.1
- Name: {self.metadata_name or self.name}
- Version: {self.version}
- {self.requires_str()}
- """
- ).encode("utf-8")
-
-
-@pytest.fixture(scope="function")
-def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]:
- """Generate a PyPI package index.html within a temporary local directory."""
- html_dir = tmpdir / "index_html_content"
- html_dir.mkdir()
-
- def generate_index_html_subdir(index_html: str) -> Path:
- """Create a new subdirectory after a UUID and write an index.html."""
- new_subdir = html_dir / uuid.uuid4().hex
- new_subdir.mkdir()
-
- with open(new_subdir / "index.html", "w") as f:
- f.write(index_html)
-
- return new_subdir
-
- return generate_index_html_subdir
-
-
@pytest.fixture(scope="function")
-def html_index_for_packages(
- shared_data: TestData,
- write_index_html_content: Callable[[str], Path],
-) -> Callable[..., Path]:
- """Generate a PyPI HTML package index within a local directory pointing to
- blank data."""
+def download_local_html_index(
+ script: PipTestEnvironment,
+ html_index_for_packages: Path,
+ tmpdir: Path,
+) -> Callable[..., Tuple[TestPipResult, Path]]:
+ """Execute `pip download` against a generated PyPI index."""
+ download_dir = tmpdir / "download_dir"
- def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
+ def run_for_generated_index(
+ args: List[str],
+ allow_error: bool = False,
+ ) -> Tuple[TestPipResult, Path]:
"""
- Produce a PyPI directory structure pointing to the specified packages.
+ Produce a PyPI directory structure pointing to the specified packages, then
+ execute `pip download -i ...` pointing to our generated index.
"""
- # (1) Generate the content for a PyPI index.html.
- pkg_links = "\n".join(
- f' {pkg}' for pkg in packages.keys()
- )
- index_html = f"""\
-
-
-
-
- Simple index
-
-
-{pkg_links}
-
-"""
- # (2) Generate the index.html in a new subdirectory of the temp directory.
- index_html_subdir = write_index_html_content(index_html)
-
- # (3) Generate subdirectories for individual packages, each with their own
- # index.html.
- for pkg, links in packages.items():
- pkg_subdir = index_html_subdir / pkg
- pkg_subdir.mkdir()
-
- download_links: List[str] = []
- for package_link in links:
- # (3.1) Generate the tag which pip can crawl pointing to this
- # specific package version.
- download_links.append(
- f' {package_link.filename}
' # noqa: E501
- )
- # (3.2) Copy over the corresponding file in `shared_data.packages`.
- shutil.copy(
- shared_data.packages / package_link.filename,
- pkg_subdir / package_link.filename,
- )
- # (3.3) Write a metadata file, if applicable.
- if package_link.metadata != MetadataKind.NoFile:
- with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
- f.write(package_link.generate_metadata())
-
- # (3.4) After collating all the download links and copying over the files,
- # write an index.html with the generated download links for each
- # copied file for this specific package name.
- download_links_str = "\n".join(download_links)
- pkg_index_content = f"""\
-
-
-
-
- Links for {pkg}
-
-
- Links for {pkg}
-{download_links_str}
-
-"""
- with open(pkg_subdir / "index.html", "w") as f:
- f.write(pkg_index_content)
-
- return index_html_subdir
-
- return generate_html_index_for_packages
+ pip_args = [
+ "download",
+ "-d",
+ str(download_dir),
+ "-i",
+ path_to_url(str(html_index_for_packages)),
+ *args,
+ ]
+ result = script.pip(*pip_args, allow_error=allow_error)
+ return (result, download_dir)
+
+ return run_for_generated_index
@pytest.fixture(scope="function")
-def download_generated_html_index(
+def download_server_html_index(
script: PipTestEnvironment,
- html_index_for_packages: Callable[[Dict[str, List[Package]]], Path],
tmpdir: Path,
+ html_index_with_onetime_server: http.server.ThreadingHTTPServer,
) -> Callable[..., Tuple[TestPipResult, Path]]:
"""Execute `pip download` against a generated PyPI index."""
download_dir = tmpdir / "download_dir"
def run_for_generated_index(
- packages: Dict[str, List[Package]],
args: List[str],
allow_error: bool = False,
) -> Tuple[TestPipResult, Path]:
@@ -1419,13 +1282,12 @@ def run_for_generated_index(
Produce a PyPI directory structure pointing to the specified packages, then
execute `pip download -i ...` pointing to our generated index.
"""
- index_dir = html_index_for_packages(packages)
pip_args = [
"download",
"-d",
str(download_dir),
"-i",
- path_to_url(str(index_dir)),
+ "http://localhost:8000",
*args,
]
result = script.pip(*pip_args, allow_error=allow_error)
@@ -1434,86 +1296,6 @@ def run_for_generated_index(
return run_for_generated_index
-# The package database we generate for testing PEP 658 support.
-_simple_packages: Dict[str, List[Package]] = {
- "simple": [
- Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256),
- Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No),
- # This will raise a hashing error.
- Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash),
- ],
- "simple2": [
- # Override the dependencies here in order to force pip to download
- # simple-1.0.tar.gz as well.
- Package(
- "simple2",
- "1.0",
- "simple2-1.0.tar.gz",
- MetadataKind.Unhashed,
- ("simple==1.0",),
- ),
- # This will raise an error when pip attempts to fetch the metadata file.
- Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile),
- # This has a METADATA file with a mismatched name.
- Package(
- "simple2",
- "3.0",
- "simple2-3.0.tar.gz",
- MetadataKind.Sha256,
- metadata_name="not-simple2",
- ),
- ],
- "colander": [
- # Ensure we can read the dependencies from a metadata file within a wheel
- # *without* PEP 658 metadata.
- Package(
- "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No
- ),
- ],
- "compilewheel": [
- # Ensure we can override the dependencies of a wheel file by injecting PEP
- # 658 metadata.
- Package(
- "compilewheel",
- "1.0",
- "compilewheel-1.0-py2.py3-none-any.whl",
- MetadataKind.Unhashed,
- ("simple==1.0",),
- ),
- ],
- "has-script": [
- # Ensure we check PEP 658 metadata hashing errors for wheel files.
- Package(
- "has-script",
- "1.0",
- "has.script-1.0-py2.py3-none-any.whl",
- MetadataKind.WrongHash,
- ),
- ],
- "translationstring": [
- Package(
- "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No
- ),
- ],
- "priority": [
- # Ensure we check for a missing metadata file for wheels.
- Package(
- "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile
- ),
- ],
- "requires-simple-extra": [
- # Metadata name is not canonicalized.
- Package(
- "requires-simple-extra",
- "0.1",
- "requires_simple_extra-0.1-py2.py3-none-any.whl",
- MetadataKind.Sha256,
- metadata_name="Requires_Simple.Extra",
- ),
- ],
-}
-
-
@pytest.mark.parametrize(
"requirement_to_download, expected_outputs",
[
@@ -1530,14 +1312,13 @@ def run_for_generated_index(
],
)
def test_download_metadata(
- download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_local_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
expected_outputs: List[str],
) -> None:
"""Verify that if a data-dist-info-metadata attribute is present, then it is used
instead of the actual dist's METADATA."""
- _, download_dir = download_generated_html_index(
- _simple_packages,
+ _, download_dir = download_local_html_index(
[requirement_to_download],
)
assert sorted(os.listdir(download_dir)) == expected_outputs
@@ -1557,14 +1338,13 @@ def test_download_metadata(
],
)
def test_incorrect_metadata_hash(
- download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_local_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
real_hash: str,
) -> None:
"""Verify that if a hash for data-dist-info-metadata is provided, it must match the
actual hash of the metadata file."""
- result, _ = download_generated_html_index(
- _simple_packages,
+ result, _ = download_local_html_index(
[requirement_to_download],
allow_error=True,
)
@@ -1583,15 +1363,14 @@ def test_incorrect_metadata_hash(
],
)
def test_metadata_not_found(
- download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_local_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
expected_url: str,
) -> None:
"""Verify that if a data-dist-info-metadata attribute is provided, that pip will
fetch the .metadata file at the location specified by PEP 658, and error
if unavailable."""
- result, _ = download_generated_html_index(
- _simple_packages,
+ result, _ = download_local_html_index(
[requirement_to_download],
allow_error=True,
)
@@ -1604,11 +1383,10 @@ def test_metadata_not_found(
def test_produces_error_for_mismatched_package_name_in_metadata(
- download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_local_html_index: Callable[..., Tuple[TestPipResult, Path]],
) -> None:
"""Verify that the package name from the metadata matches the requested package."""
- result, _ = download_generated_html_index(
- _simple_packages,
+ result, _ = download_local_html_index(
["simple2==3.0"],
allow_error=True,
)
@@ -1628,7 +1406,7 @@ def test_produces_error_for_mismatched_package_name_in_metadata(
),
)
def test_canonicalizes_package_name_before_verifying_metadata(
- download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_local_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement: str,
) -> None:
"""Verify that the package name from the command line and the package's
@@ -1636,8 +1414,7 @@ def test_canonicalizes_package_name_before_verifying_metadata(
Regression test for /~https://github.com/pypa/pip/issues/12038
"""
- result, download_dir = download_generated_html_index(
- _simple_packages,
+ result, download_dir = download_local_html_index(
[requirement],
allow_error=True,
)
From 50a2fb4f9fca0427c192608c30f3cf536b4e4ed4 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 2 Aug 2023 00:02:04 -0400
Subject: [PATCH 2/6] add mock server to test that each dist is downloaded
exactly once
---
tests/conftest.py | 48 +++++++++++++++++++++++++++++
tests/functional/test_download.py | 51 +++++++++++++++++++++++++++++++
2 files changed, 99 insertions(+)
diff --git a/tests/conftest.py b/tests/conftest.py
index f481e06c8d6..25581af9a45 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -992,3 +992,51 @@ def html_index_for_packages(
f.write(pkg_index_content)
return html_dir
+
+
+class OneTimeDownloadHandler(http.server.SimpleHTTPRequestHandler):
+ """Serve files from the current directory, but error if a file is downloaded more
+ than once."""
+
+ _seen_paths: ClassVar[Set[str]] = set()
+
+ def do_GET(self) -> None:
+ if self.path in self._seen_paths:
+ self.send_error(
+ http.HTTPStatus.NOT_FOUND,
+ f"File {self.path} not available more than once!",
+ )
+ return
+ super().do_GET()
+ if not (self.path.endswith("/") or self.path.endswith(".metadata")):
+ self._seen_paths.add(self.path)
+
+
+@pytest.fixture(scope="function")
+def html_index_with_onetime_server(
+ html_index_for_packages: Path,
+) -> Iterator[http.server.ThreadingHTTPServer]:
+ """Serve files from a generated pypi index, erroring if a file is downloaded more
+ than once.
+
+ Provide `-i http://localhost:8000` to pip invocations to point them at this server.
+ """
+
+ class InDirectoryServer(http.server.ThreadingHTTPServer):
+ def finish_request(self, request: Any, client_address: Any) -> None:
+ self.RequestHandlerClass(
+ request, client_address, self, directory=str(html_index_for_packages) # type: ignore[call-arg] # noqa: E501
+ )
+
+ class Handler(OneTimeDownloadHandler):
+ _seen_paths: ClassVar[Set[str]] = set()
+
+ with InDirectoryServer(("", 8000), Handler) as httpd:
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.start()
+
+ try:
+ yield httpd
+ finally:
+ httpd.shutdown()
+ server_thread.join()
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index bedadc7045b..c204f424b67 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1324,6 +1324,57 @@ def test_download_metadata(
assert sorted(os.listdir(download_dir)) == expected_outputs
+@pytest.mark.parametrize(
+ "requirement_to_download, expected_outputs, doubled_path",
+ [
+ (
+ "simple2==1.0",
+ ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"],
+ "/simple2/simple2-1.0.tar.gz",
+ ),
+ ("simple==2.0", ["simple-2.0.tar.gz"], "/simple/simple-2.0.tar.gz"),
+ (
+ "colander",
+ ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"],
+ "/colander/colander-0.9.9-py2.py3-none-any.whl",
+ ),
+ (
+ "compilewheel",
+ [
+ "compilewheel-1.0-py2.py3-none-any.whl",
+ "simple-1.0.tar.gz",
+ ],
+ "/compilewheel/compilewheel-1.0-py2.py3-none-any.whl",
+ ),
+ ],
+)
+def test_download_metadata_server(
+ download_server_html_index: Callable[..., Tuple[TestPipResult, Path]],
+ requirement_to_download: str,
+ expected_outputs: List[str],
+ doubled_path: str,
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is present, then it is used
+ instead of the actual dist's METADATA.
+
+ Additionally, verify that each dist is downloaded exactly once using a mock server.
+
+ This is a regression test for issue /~https://github.com/pypa/pip/issues/11847.
+ """
+ _, download_dir = download_server_html_index(
+ [requirement_to_download, "--no-cache-dir"],
+ )
+ assert sorted(os.listdir(download_dir)) == expected_outputs
+ shutil.rmtree(download_dir)
+ result, _ = download_server_html_index(
+ [requirement_to_download, "--no-cache-dir"],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_msg = f"File {doubled_path} not available more than once!"
+ assert expected_msg in result.stderr
+
+
@pytest.mark.parametrize(
"requirement_to_download, real_hash",
[
From 22637722aa7c9da0aaf58be82b6bef16d6efd097 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Tue, 1 Aug 2023 20:45:26 -0400
Subject: [PATCH 3/6] fix #11847 for sdists
---
src/pip/_internal/operations/prepare.py | 50 ++++++++++++++++---------
1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 8402be01bbf..81bf48fbb75 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -7,7 +7,7 @@
import mimetypes
import os
import shutil
-from typing import Dict, Iterable, List, Optional
+from typing import Dict, Iterable, List, Optional, Set
from pip._vendor.packaging.utils import canonicalize_name
@@ -474,6 +474,8 @@ def _complete_partial_requirements(
assert req.link
links_to_fully_download[req.link] = req
+ reqs_with_newly_unpacked_source_dirs: Set[Link] = set()
+
batch_download = self._batch_download(
links_to_fully_download.keys(),
temp_dir,
@@ -481,25 +483,35 @@ def _complete_partial_requirements(
for link, (filepath, _) in batch_download:
logger.debug("Downloading link %s to %s", link, filepath)
req = links_to_fully_download[link]
+ # Record the downloaded file path so wheel reqs can extract a Distribution
+ # in .get_dist().
req.local_file_path = filepath
- # TODO: This needs fixing for sdists
- # This is an emergency fix for #11847, which reports that
- # distributions get downloaded twice when metadata is loaded
- # from a PEP 658 standalone metadata file. Setting _downloaded
- # fixes this for wheels, but breaks the sdist case (tests
- # test_download_metadata). As PyPI is currently only serving
- # metadata for wheels, this is not an immediate issue.
- # Fixing the problem properly looks like it will require a
- # complete refactoring of the `prepare_linked_requirements_more`
- # logic, and I haven't a clue where to start on that, so for now
- # I have fixed the issue *just* for wheels.
- if req.is_wheel:
- self._downloaded[req.link.url] = filepath
+ # Record that the file is downloaded so we don't do it again in
+ # _prepare_linked_requirement().
+ self._downloaded[req.link.url] = filepath
+
+ # If this is an sdist, we need to unpack it and set the .source_dir
+ # immediately after downloading, as _prepare_linked_requirement() assumes
+ # the req is either not downloaded at all, or both downloaded and
+ # unpacked. The downloading and unpacking is is typically done with
+ # unpack_url(), but we separate the downloading and unpacking steps here in
+ # order to use the BatchDownloader.
+ if not req.is_wheel:
+ hashes = self._get_linked_req_hashes(req)
+ assert filepath == _check_download_dir(req.link, temp_dir, hashes)
+ self._ensure_link_req_src_dir(req, parallel_builds)
+ unpack_file(filepath, req.source_dir)
+ reqs_with_newly_unpacked_source_dirs.add(req.link)
# This step is necessary to ensure all lazy wheels are processed
# successfully by the 'download', 'wheel', and 'install' commands.
for req in partially_downloaded_reqs:
- self._prepare_linked_requirement(req, parallel_builds)
+ self._prepare_linked_requirement(
+ req,
+ parallel_builds,
+ source_dir_exists_already=req.link
+ in reqs_with_newly_unpacked_source_dirs,
+ )
def prepare_linked_requirement(
self, req: InstallRequirement, parallel_builds: bool = False
@@ -570,7 +582,10 @@ def prepare_linked_requirements_more(
)
def _prepare_linked_requirement(
- self, req: InstallRequirement, parallel_builds: bool
+ self,
+ req: InstallRequirement,
+ parallel_builds: bool,
+ source_dir_exists_already: bool = False,
) -> BaseDistribution:
assert req.link
link = req.link
@@ -602,7 +617,8 @@ def _prepare_linked_requirement(
req.link = req.cached_wheel_source_link
link = req.link
- self._ensure_link_req_src_dir(req, parallel_builds)
+ if not source_dir_exists_already:
+ self._ensure_link_req_src_dir(req, parallel_builds)
if link.is_existing_dir():
local_file = None
From 957ad95c7d2ca70077a61a3adb551824818f929b Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Tue, 1 Aug 2023 21:18:15 -0400
Subject: [PATCH 4/6] add news entry
---
news/12191.bugfix.rst | 1 +
1 file changed, 1 insertion(+)
create mode 100644 news/12191.bugfix.rst
diff --git a/news/12191.bugfix.rst b/news/12191.bugfix.rst
new file mode 100644
index 00000000000..1f384835fef
--- /dev/null
+++ b/news/12191.bugfix.rst
@@ -0,0 +1 @@
+Prevent downloading sdists twice when PEP 658 metadata is present.
From bfa8a5532d45815d2229ba2e2a920fde6bffc800 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Thu, 3 Aug 2023 05:50:37 -0400
Subject: [PATCH 5/6] clean up duplicated code
---
src/pip/_internal/operations/prepare.py | 54 +++++--------------------
src/pip/_internal/req/req_install.py | 29 ++++++++++++-
tests/conftest.py | 52 +++++++++++++-----------
3 files changed, 68 insertions(+), 67 deletions(-)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 81bf48fbb75..1b32d7eec3e 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -7,7 +7,8 @@
import mimetypes
import os
import shutil
-from typing import Dict, Iterable, List, Optional, Set
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional
from pip._vendor.packaging.utils import canonicalize_name
@@ -20,7 +21,6 @@
InstallationError,
MetadataInconsistent,
NetworkConnectionError,
- PreviousBuildDirError,
VcsHashUnsupported,
)
from pip._internal.index.package_finder import PackageFinder
@@ -47,7 +47,6 @@
display_path,
hash_file,
hide_url,
- is_installable_dir,
)
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.unpacking import unpack_file
@@ -319,21 +318,7 @@ def _ensure_link_req_src_dir(
autodelete=True,
parallel_builds=parallel_builds,
)
-
- # If a checkout exists, it's unwise to keep going. version
- # inconsistencies are logged later, but do not fail the
- # installation.
- # FIXME: this won't upgrade when there's an existing
- # package unpacked in `req.source_dir`
- # TODO: this check is now probably dead code
- if is_installable_dir(req.source_dir):
- raise PreviousBuildDirError(
- "pip can't proceed with requirements '{}' due to a"
- "pre-existing build directory ({}). This is likely "
- "due to a previous installation that failed . pip is "
- "being responsible and not assuming it can delete this. "
- "Please delete it and try again.".format(req, req.source_dir)
- )
+ req.ensure_pristine_source_checkout()
def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# By the time this is called, the requirement's link should have
@@ -474,8 +459,6 @@ def _complete_partial_requirements(
assert req.link
links_to_fully_download[req.link] = req
- reqs_with_newly_unpacked_source_dirs: Set[Link] = set()
-
batch_download = self._batch_download(
links_to_fully_download.keys(),
temp_dir,
@@ -490,28 +473,17 @@ def _complete_partial_requirements(
# _prepare_linked_requirement().
self._downloaded[req.link.url] = filepath
- # If this is an sdist, we need to unpack it and set the .source_dir
- # immediately after downloading, as _prepare_linked_requirement() assumes
- # the req is either not downloaded at all, or both downloaded and
- # unpacked. The downloading and unpacking is is typically done with
- # unpack_url(), but we separate the downloading and unpacking steps here in
- # order to use the BatchDownloader.
+ # If this is an sdist, we need to unpack it after downloading, but the
+ # .source_dir won't be set up until we are in _prepare_linked_requirement().
+ # Add the downloaded archive to the install requirement to unpack after
+ # preparing the source dir.
if not req.is_wheel:
- hashes = self._get_linked_req_hashes(req)
- assert filepath == _check_download_dir(req.link, temp_dir, hashes)
- self._ensure_link_req_src_dir(req, parallel_builds)
- unpack_file(filepath, req.source_dir)
- reqs_with_newly_unpacked_source_dirs.add(req.link)
+ req.needs_unpacked_archive(Path(filepath))
# This step is necessary to ensure all lazy wheels are processed
# successfully by the 'download', 'wheel', and 'install' commands.
for req in partially_downloaded_reqs:
- self._prepare_linked_requirement(
- req,
- parallel_builds,
- source_dir_exists_already=req.link
- in reqs_with_newly_unpacked_source_dirs,
- )
+ self._prepare_linked_requirement(req, parallel_builds)
def prepare_linked_requirement(
self, req: InstallRequirement, parallel_builds: bool = False
@@ -582,10 +554,7 @@ def prepare_linked_requirements_more(
)
def _prepare_linked_requirement(
- self,
- req: InstallRequirement,
- parallel_builds: bool,
- source_dir_exists_already: bool = False,
+ self, req: InstallRequirement, parallel_builds: bool
) -> BaseDistribution:
assert req.link
link = req.link
@@ -617,8 +586,7 @@ def _prepare_linked_requirement(
req.link = req.cached_wheel_source_link
link = req.link
- if not source_dir_exists_already:
- self._ensure_link_req_src_dir(req, parallel_builds)
+ self._ensure_link_req_src_dir(req, parallel_builds)
if link.is_existing_dir():
local_file = None
diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py
index 542d6c78f96..614c6de9c3d 100644
--- a/src/pip/_internal/req/req_install.py
+++ b/src/pip/_internal/req/req_install.py
@@ -6,6 +6,7 @@
import uuid
import zipfile
from optparse import Values
+from pathlib import Path
from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union
from pip._vendor.packaging.markers import Marker
@@ -17,7 +18,7 @@
from pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment
-from pip._internal.exceptions import InstallationError
+from pip._internal.exceptions import InstallationError, PreviousBuildDirError
from pip._internal.locations import get_scheme
from pip._internal.metadata import (
BaseDistribution,
@@ -47,11 +48,13 @@
backup_dir,
display_path,
hide_url,
+ is_installable_dir,
redact_auth_from_url,
)
from pip._internal.utils.packaging import safe_extra
from pip._internal.utils.subprocess import runner_with_spinner_message
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
+from pip._internal.utils.unpacking import unpack_file
from pip._internal.utils.virtualenv import running_under_virtualenv
from pip._internal.vcs import vcs
@@ -180,6 +183,9 @@ def __init__(
# This requirement needs more preparation before it can be built
self.needs_more_preparation = False
+ # This requirement needs to be unpacked before it can be installed.
+ self._archive_source: Optional[Path] = None
+
def __str__(self) -> str:
if self.req:
s = str(self.req)
@@ -645,6 +651,27 @@ def ensure_has_source_dir(
parallel_builds=parallel_builds,
)
+ def needs_unpacked_archive(self, archive_source: Path) -> None:
+ assert self._archive_source is None
+ self._archive_source = archive_source
+
+ def ensure_pristine_source_checkout(self) -> None:
+ """Ensure the source directory has not yet been built in."""
+ assert self.source_dir is not None
+ if self._archive_source is not None:
+ unpack_file(str(self._archive_source), self.source_dir)
+ elif is_installable_dir(self.source_dir):
+ # If a checkout exists, it's unwise to keep going.
+ # version inconsistencies are logged later, but do not fail
+ # the installation.
+ raise PreviousBuildDirError(
+ "pip can't proceed with requirements '{}' due to a "
+ "pre-existing build directory ({}). This is likely "
+ "due to a previous installation that failed . pip is "
+ "being responsible and not assuming it can delete this. "
+ "Please delete it and try again.".format(self, self.source_dir)
+ )
+
# For editable installations
def update_editable(self) -> None:
if not self.link:
diff --git a/tests/conftest.py b/tests/conftest.py
index 25581af9a45..cd9931c66d9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -935,17 +935,21 @@ def html_index_for_packages(
pkg_links = "\n".join(
f' {pkg}' for pkg in fake_packages.keys()
)
- index_html = f"""\
-
-
-
-
- Simple index
-
-
-{pkg_links}
-
-"""
+ # Output won't be nicely indented because dedent() acts after f-string
+ # arg insertion.
+ index_html = dedent(
+ f"""\
+
+
+
+
+ Simple index
+
+
+ {pkg_links}
+
+ """
+ )
# (2) Generate the index.html in a new subdirectory of the temp directory.
(html_dir / "index.html").write_text(index_html)
@@ -976,18 +980,20 @@ def html_index_for_packages(
# write an index.html with the generated download links for each
# copied file for this specific package name.
download_links_str = "\n".join(download_links)
- pkg_index_content = f"""\
-
-
-
-
- Links for {pkg}
-
-
- Links for {pkg}
-{download_links_str}
-
-"""
+ pkg_index_content = dedent(
+ f"""\
+
+
+
+
+ Links for {pkg}
+
+
+ Links for {pkg}
+ {download_links_str}
+
+ """
+ )
with open(pkg_subdir / "index.html", "w") as f:
f.write(pkg_index_content)
From 39da6e051a30e90b12608ca90e96e554d82fd15f Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 14 Aug 2023 07:55:55 -0400
Subject: [PATCH 6/6] use f-string in exception message
---
src/pip/_internal/req/req_install.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py
index 614c6de9c3d..8110114ca14 100644
--- a/src/pip/_internal/req/req_install.py
+++ b/src/pip/_internal/req/req_install.py
@@ -665,11 +665,11 @@ def ensure_pristine_source_checkout(self) -> None:
# version inconsistencies are logged later, but do not fail
# the installation.
raise PreviousBuildDirError(
- "pip can't proceed with requirements '{}' due to a "
- "pre-existing build directory ({}). This is likely "
+ f"pip can't proceed with requirements '{self}' due to a "
+ f"pre-existing build directory ({self.source_dir}). This is likely "
"due to a previous installation that failed . pip is "
"being responsible and not assuming it can delete this. "
- "Please delete it and try again.".format(self, self.source_dir)
+ "Please delete it and try again."
)
# For editable installations