diff --git a/README.md b/README.md index ece49b3..f432208 100644 --- a/README.md +++ b/README.md @@ -937,27 +937,20 @@ curl https://artifacts.example.com/ | jq '.remotes' ## Python Package Proxy with uv -The `pypi` package type turns the artifact API into a caching PyPI proxy. Simple index pages (`/simple/{package}/`) are mutable and expire after `mutable_ttl`; package files (wheels, sdists, metadata) are immutable and cached forever. URLs in the simple index HTML are rewritten on the fly to point back through the proxy, so both the index lookup and the file download are served from cache. +The `pypi` package type turns the artifact API into a caching PyPI proxy using a single remote. Simple index pages (`/simple/{package}/`) are mutable and expire after `mutable_ttl`; package files (wheels, sdists, metadata) are immutable and cached forever. URLs in the simple index HTML are rewritten on the fly to point back through the same remote, so both the index lookup and the file download are served from cache. + +For public PyPI, `base_url` is set to `https://files.pythonhosted.org` (the file host). Simple index requests are transparently fetched from `https://pypi.org` — no extra configuration needed. For self-hosted registries like Gitea where both index and files share the same host, `base_url` points at that host and everything routes through it automatically. ### remotes.yaml ```yaml remotes: + # Public PyPI — simple index fetched from pypi.org, files from files.pythonhosted.org pypi: - base_url: "https://pypi.org" - type: "remote" - package: "pypi" - pypi_files_url: "https://files.pythonhosted.org" # host to rewrite in index HTML - pypi_files_remote: "pypi-files" # our proxy remote to replace it with - check_mutable_updates: true - cache: - immutable_ttl: 0 - mutable_ttl: 600 # re-check simple indexes after 10 minutes - - pypi-files: base_url: "https://files.pythonhosted.org" type: "remote" - package: "generic" + package: "pypi" + check_mutable_updates: true immutable_patterns: - "packages/.*\\.whl$" - "packages/.*\\.whl\\.metadata$" @@ -965,17 +958,16 @@ remotes: - "packages/.*\\.zip$" - "packages/.*\\.egg$" cache: - immutable_ttl: 0 # package files are content-addressed — cache forever + immutable_ttl: 0 + mutable_ttl: 600 # re-check simple indexes after 10 minutes - # Self-hosted Gitea PyPI registry (index and files share the same base URL) + # Self-hosted Gitea PyPI registry — index and files at the same base URL pypi-gitea: base_url: "https://gitea.example.com/api/packages/myorg/pypi" type: "remote" package: "pypi" # username: "your-gitea-username" # password: "your-personal-access-token" # needs package:read scope - pypi_files_url: "https://gitea.example.com/api/packages/myorg/pypi" - pypi_files_remote: "pypi-gitea" # point back to itself — Gitea serves both index and files check_mutable_updates: true immutable_patterns: - "files/.*\\.whl$" @@ -1028,12 +1020,10 @@ Setting `default = true` replaces uv's built-in PyPI index. The first install of When uv requests the simple index for a package, the proxy: 1. Fetches `https://pypi.org/simple/{package}/` (or returns a valid cached copy within `mutable_ttl`) -2. Rewrites every `https://files.pythonhosted.org/...` href to `https://artifacts.example.com/api/v1/remote/pypi-files/...` +2. Rewrites every `https://files.pythonhosted.org/...` href to `https://artifacts.example.com/api/v1/remote/pypi/...` 3. Returns the rewritten HTML to uv -uv then downloads wheels and `.whl.metadata` files via the rewritten URLs, which also pass through the proxy and are cached as immutable artifacts. - -For self-hosted registries like Gitea, both the index and file downloads share the same base URL. Setting `pypi_files_url` and `pypi_files_remote` to the same remote causes file links to be rewritten back through the same proxy entry. +uv then downloads wheels and `.whl.metadata` files via the rewritten URLs, which also pass through the same proxy remote and are cached as immutable artifacts. For Gitea and other self-hosted registries, the same mechanism applies — `base_url` is the file host, and index page hrefs pointing at that host are rewritten to the proxy. ## npm Package Proxy @@ -1047,8 +1037,6 @@ remotes: base_url: "https://registry.npmjs.org" type: "remote" package: "npm" - npm_files_url: "https://registry.npmjs.org" # URL prefix to rewrite in metadata JSON - npm_files_remote: "npm" # rewrite back to this same remote check_mutable_updates: true immutable_patterns: - "\.tgz$" # versioned tarballs are content-addressed — cache forever diff --git a/remotes.yaml b/remotes.yaml index 33b3057..f0a5836 100644 --- a/remotes.yaml +++ b/remotes.yaml @@ -195,15 +195,19 @@ remotes: mutable_ttl: 300 pypi: - base_url: "https://pypi.org" + base_url: "https://files.pythonhosted.org" type: "remote" package: "pypi" - description: "Python Package Index — simple repository API" - # pypi_files_url: the upstream host used in simple-index hrefs (default: files.pythonhosted.org) - # pypi_files_remote: our proxy remote that will serve those files (default: pypi-files) - pypi_files_url: "https://files.pythonhosted.org" - pypi_files_remote: "pypi-files" + description: "Python Package Index — simple index and package files via a single remote" + # simple/ requests are transparently fetched from pypi.org; package files come from + # files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote. check_mutable_updates: true + immutable_patterns: + - "packages/.*\\.whl$" + - "packages/.*\\.whl\\.metadata$" + - "packages/.*\\.tar\\.gz$" + - "packages/.*\\.zip$" + - "packages/.*\\.egg$" cache: immutable_ttl: 0 mutable_ttl: 600 # Simple index pages refreshed after 10 minutes @@ -212,12 +216,9 @@ remotes: base_url: "https://gitea.example.com/api/packages/myorg/pypi" type: "remote" package: "pypi" - description: "Private Gitea PyPI registry" + description: "Private Gitea PyPI registry — simple index and files at the same host" # username: "your-gitea-username" # password: "your-personal-access-token" # needs package:read scope - # Files are served from the same Gitea instance — rewrite back to this same remote - pypi_files_url: "https://gitea.example.com/api/packages/myorg/pypi" - pypi_files_remote: "pypi-gitea" check_mutable_updates: true immutable_patterns: - "files/.*\\.whl$" @@ -229,29 +230,11 @@ remotes: immutable_ttl: 0 mutable_ttl: 600 - pypi-files: - base_url: "https://files.pythonhosted.org" - type: "remote" - package: "generic" - description: "Python Package Index — file storage (wheels, sdists)" - immutable_patterns: - - "packages/.*\\.whl$" - - "packages/.*\\.whl\\.metadata$" - - "packages/.*\\.tar\\.gz$" - - "packages/.*\\.zip$" - - "packages/.*\\.egg$" - cache: - immutable_ttl: 0 # Package files are content-addressed — cache forever - npm: base_url: "https://registry.npmjs.org" type: "remote" package: "npm" description: "npm registry — package metadata with tarball URL rewriting" - # npm_files_url: the upstream host used in metadata tarball hrefs (default: https://registry.npmjs.org) - # npm_files_remote: our proxy remote that will serve those tarballs (default: npm-files) - npm_files_url: "https://registry.npmjs.org" - npm_files_remote: "npm" check_mutable_updates: true immutable_patterns: - \.tgz$ diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 9c00437..8816249 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -160,12 +160,13 @@ async def construct_remote_url(remote_name: str, path: str) -> str: if not base_url: raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'") - # Handle Docker registry URLs if remote_config.get("package") == "docker": - # Convert Docker paths to v2 API format - # e.g., library/nginx/manifests/latest -> v2/library/nginx/manifests/latest return f"{base_url}/v2/{path}" + # PyPI splits index and files across two hosts; redirect simple/ requests to pypi.org + if remote_config.get("package") == "pypi" and base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path: + return f"https://pypi.org/{path}" + return f"{base_url}/{path}" @@ -363,24 +364,26 @@ def _resolve_content( remote_name: str = "", ) -> tuple[bytes, str]: """Return (possibly-rewritten data, content_type) for a cached artifact.""" - if remote_config.get("package") == "pypi" and "simple/" in path: - files_url = remote_config.get("pypi_files_url", "https://files.pythonhosted.org") - files_remote = remote_config.get("pypi_files_remote", "pypi-files") - proxy_base = str(request.base_url).rstrip("/") - data = data.replace( - files_url.rstrip("/").encode(), - f"{proxy_base}/api/v1/remote/{files_remote}".encode(), - ) - return data, "text/html; charset=utf-8" - if remote_config.get("package") == "npm" and not path.endswith(".tgz"): - files_url = remote_config.get("npm_files_url", "https://registry.npmjs.org") - files_remote = remote_config.get("npm_files_remote", "npm-files") - proxy_base = str(request.base_url).rstrip("/") - data = data.replace( - files_url.rstrip("/").encode(), - f"{proxy_base}/api/v1/remote/{files_remote}".encode(), - ) - return data, "application/json" + if remote_config.get("package") == "pypi": + immutable = remote_config.get("immutable_patterns", []) + if not any(re.search(p, path) for p in immutable): + proxy_base = str(request.base_url).rstrip("/") + base_url = remote_config.get("base_url", "").rstrip("/") + data = data.replace( + base_url.encode(), + f"{proxy_base}/api/v1/remote/{remote_name}".encode(), + ) + return data, "text/html; charset=utf-8" + if remote_config.get("package") == "npm": + immutable = remote_config.get("immutable_patterns", []) + if not any(re.search(p, path) for p in immutable): + proxy_base = str(request.base_url).rstrip("/") + base_url = remote_config.get("base_url", "").rstrip("/") + data = data.replace( + base_url.encode(), + f"{proxy_base}/api/v1/remote/{remote_name}".encode(), + ) + return data, "application/json" if remote_config.get("package") == "helm" and filename == "index.yaml": proxy_base = str(request.base_url).rstrip("/") base_url = remote_config.get("base_url", "").rstrip("/") diff --git a/tests/conftest.py b/tests/conftest.py index dc259e5..1fada7e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,30 +73,20 @@ TEST_REMOTES = { "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, }, "pypi-test": { - "base_url": "https://pypi.org", - "type": "remote", - "package": "pypi", - "pypi_files_url": "https://files.pythonhosted.org", - "pypi_files_remote": "pypi-files-test", - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "pypi-files-test": { "base_url": "https://files.pythonhosted.org", "type": "remote", - "package": "generic", + "package": "pypi", "immutable_patterns": [ - "packages/.*\\.whl$", - "packages/.*\\.whl\\.metadata$", - "packages/.*\\.tar\\.gz$", + r"packages/.*\.whl$", + r"packages/.*\.whl\.metadata$", + r"packages/.*\.tar\.gz$", ], - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, + "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, }, "npm-test": { "base_url": "https://registry.npmjs.org", "type": "remote", "package": "npm", - "npm_files_url": "https://registry.npmjs.org", - "npm_files_remote": "npm-test", "immutable_patterns": [r"\.tgz$"], "mutable_patterns": [r"^(?!.*\.tgz$).*"], "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, diff --git a/tests/test_routes.py b/tests/test_routes.py index 3c723a1..dbe4815 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -685,7 +685,7 @@ class TestPyPIRemote: response = client.get("/api/v1/remote/pypi-test/simple/requests/") assert response.status_code == 200 assert b"files.pythonhosted.org" not in response.content - assert b"/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz" in response.content + assert b"/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz" in response.content def test_simple_index_content_type_is_html(self, client, patched_deps): deps = patched_deps @@ -722,7 +722,7 @@ class TestPyPIRemote: deps["storage"].download_object.return_value = b"PK wheel bytes" deps["cache"].is_mutable_file.return_value = False - response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0-py3-none-any.whl") + response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0-py3-none-any.whl") assert response.status_code == 200 assert "application/zip" in response.headers["content-type"] assert response.headers["X-Artifact-Source"] == "cache" @@ -733,13 +733,13 @@ class TestPyPIRemote: deps["storage"].download_object.return_value = b"tar bytes" deps["cache"].is_mutable_file.return_value = False - response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz") + response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz") assert response.status_code == 200 assert "application/gzip" in response.headers["content-type"] - def test_blocked_path_on_files_remote_returns_403(self, client, patched_deps): - """Paths that don't match immutable_patterns on pypi-files-test are blocked.""" - response = client.get("/api/v1/remote/pypi-files-test/packages/requests.unknown") + def test_unknown_extension_on_pypi_remote_returns_403(self, client, patched_deps): + """Paths that don't match immutable_patterns and aren't mutable are blocked.""" + response = client.get("/api/v1/remote/pypi-test/packages/requests.unknown") assert response.status_code == 403