From 4ca89b9159e9a8023242e9b0ec5be8ae76e25408 Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Mon, 27 Apr 2026 22:17:31 +1000 Subject: [PATCH] feat: add helm chart repository caching proxy - Add helm package type with index.yaml as mutable (TTL-based) and .tgz chart tarballs as immutable - Rewrite chart URLs in index.yaml to serve tarballs via proxy cache - Add text/yaml content-type detection for .yaml/.yml files - Add hashicorp-helm example remote in remotes.yaml - Update README with Helm chart repository proxy section - Add tests for helm mutable patterns and route behaviour --- README.md | 51 +++++++++++++++++++++++- remotes.yaml | 12 ++++++ src/artifactapi/config.py | 3 ++ src/artifactapi/main.py | 15 ++++++- tests/conftest.py | 7 ++++ tests/test_config.py | 14 +++++++ tests/test_routes.py | 83 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 182 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index adea4e8..ece49b3 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ A generic FastAPI-based artifact caching system that downloads and stores files - **S3 Storage**: MinIO/S3 backend with predictable paths - **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching - **npm Package Proxy**: Caching proxy for the npm registry with metadata URL rewriting so tarballs also pass through cache +- **Helm Chart Repository Proxy**: Caching proxy for Helm chart repositories with `index.yaml` URL rewriting so chart tarballs also pass through cache - **Content-Type Detection**: Automatic MIME type detection for downloads ## Architecture @@ -1096,4 +1097,52 @@ The client then downloads the tarball via the rewritten URL, which hits the same | `/@{scope}/{package}` | Mutable (TTL) | `/@babel/core` | | `/-/all` | Mutable (TTL) | `/-/all` | | `/{package}/-/{package}-{version}.tgz` | Immutable (forever) | `/express/-/express-4.18.2.tgz` | -| `/@{scope}/{pkg}/-/{pkg}-{ver}.tgz` | Immutable (forever) | `/@babel/core/-/core-7.21.0.tgz` | \ No newline at end of file +| `/@{scope}/{pkg}/-/{pkg}-{ver}.tgz` | Immutable (forever) | `/@babel/core/-/core-7.21.0.tgz` | + +## Helm Chart Repository Proxy + +The `helm` package type turns the artifact API into a caching Helm chart repository proxy. A single remote handles both the mutable `index.yaml` and the immutable versioned chart tarballs, since they are served from the same upstream host. Chart URLs inside `index.yaml` are rewritten on the fly to point back through the same remote, so both the index lookup and the chart download are served from cache. + +### remotes.yaml + +```yaml +remotes: + hashicorp-helm: + base_url: "https://helm.releases.hashicorp.com" + type: "remote" + package: "helm" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" # chart tarballs — cache forever + cache: + immutable_ttl: 0 + mutable_ttl: 3600 # index.yaml refreshed after 1 hour +``` + +### Configuring Helm + +Point Helm at the proxy with `helm repo add`: + +```bash +helm repo add hashicorp https://artifacts.example.com/api/v1/remote/hashicorp-helm +helm repo update +helm search repo hashicorp/vault +helm install vault hashicorp/vault +``` + +### How the rewriting works + +When a client requests `index.yaml`, the proxy: + +1. Fetches `https://helm.releases.hashicorp.com/index.yaml` (or returns a cached copy within `mutable_ttl`) +2. Rewrites every `https://helm.releases.hashicorp.com/...` chart URL to `https://artifacts.example.com/api/v1/remote/hashicorp-helm/...` +3. Returns the rewritten YAML to the client + +The client then downloads chart tarballs via the rewritten URLs, which hit the same `hashicorp-helm` remote and are cached as immutable artifacts. Subsequent installs of the same chart version are served entirely from S3. + +### Mutable vs immutable paths + +| Path | Type | Example | +|---|---|---| +| `index.yaml` | Mutable (TTL) | `index.yaml` | +| `{chart}-{version}.tgz` | Immutable (forever) | `vault-0.29.1.tgz` | \ No newline at end of file diff --git a/remotes.yaml b/remotes.yaml index a55359f..33b3057 100644 --- a/remotes.yaml +++ b/remotes.yaml @@ -261,6 +261,18 @@ remotes: immutable_ttl: 0 mutable_ttl: 600 # Package metadata refreshed after 10 minutes + hashicorp-helm: + base_url: "https://helm.releases.hashicorp.com" + type: "remote" + package: "helm" + description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 # Chart tarballs are versioned — cache forever + mutable_ttl: 3600 # index.yaml refreshed after 1 hour + local-generic: type: "local" package: "generic" diff --git a/src/artifactapi/config.py b/src/artifactapi/config.py index 1d4b330..ab1d902 100644 --- a/src/artifactapi/config.py +++ b/src/artifactapi/config.py @@ -22,6 +22,9 @@ _PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = { r"simple/", # Per-package and top-level simple index pages ], "npm": [], + "helm": [ + r"index\.yaml$", + ], "generic": [], } diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index cac6e61..9c00437 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -349,6 +349,8 @@ def _get_content_type(filename: str) -> str: return "application/xml" if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")): return "application/gzip" + if filename.endswith((".yaml", ".yml")): + return "text/yaml" return "application/octet-stream" @@ -358,6 +360,7 @@ def _resolve_content( filename: str, remote_config: dict, request: Request, + remote_name: str = "", ) -> tuple[bytes, str]: """Return (possibly-rewritten data, content_type) for a cached artifact.""" if remote_config.get("package") == "pypi" and "simple/" in path: @@ -378,6 +381,14 @@ def _resolve_content( f"{proxy_base}/api/v1/remote/{files_remote}".encode(), ) return data, "application/json" + if remote_config.get("package") == "helm" and filename == "index.yaml": + proxy_base = str(request.base_url).rstrip("/") + base_url = remote_config.get("base_url", "").rstrip("/") + data = data.replace( + base_url.encode(), + f"{proxy_base}/api/v1/remote/{remote_name}".encode(), + ) + return data, "text/yaml" return data, _get_content_type(filename) @@ -445,7 +456,7 @@ async def get_artifact(request: Request, remote_name: str, path: str): try: artifact_data = storage.download_object(cached_key) filename = os.path.basename(path) - artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request) + artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name) logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})") @@ -486,7 +497,7 @@ async def get_artifact(request: Request, remote_name: str, path: str): cache_key = storage.get_object_key(remote_name, path) artifact_data = storage.download_object(cache_key) filename = os.path.basename(path) - artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request) + artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name) metrics.record_cache_miss(remote_name, len(artifact_data)) cache_key = storage.get_object_key(remote_name, path) diff --git a/tests/conftest.py b/tests/conftest.py index 4500815..dc259e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,6 +101,13 @@ TEST_REMOTES = { "mutable_patterns": [r"^(?!.*\.tgz$).*"], "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, }, + "helm-test": { + "base_url": "https://helm.releases.hashicorp.com", + "type": "remote", + "package": "helm", + "immutable_patterns": [r"\.tgz$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, + }, } } diff --git a/tests/test_config.py b/tests/test_config.py index 147b62c..594c6cf 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -154,6 +154,20 @@ class TestGetMutablePatterns: assert any(re.search(p, "express") for p in patterns) assert any(re.search(p, "@babel/core") for p in patterns) + def test_helm_returns_index_yaml_as_mutable(self, make_config): + cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}}) + patterns = cfg.get_mutable_patterns("r") + assert r"index\.yaml$" in patterns + + def test_helm_chart_tarballs_not_mutable_by_default(self, make_config): + import re + + cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}}) + patterns = cfg.get_mutable_patterns("r") + # Only index.yaml is mutable; .tgz chart tarballs are not + assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns) + assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns) + def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config): import re diff --git a/tests/test_routes.py b/tests/test_routes.py index 1773a7e..3c723a1 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -841,3 +841,86 @@ class TestNpmRemote: response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz") assert response.status_code == 200 assert "application/gzip" in response.headers["content-type"] + + +# --------------------------------------------------------------------------- +# Helm remote /api/v1/remote/helm-test/... +# --------------------------------------------------------------------------- + + +class TestHelmRemote: + def test_index_yaml_is_mutable(self, client, patched_deps): + """index.yaml is detected as mutable (package-type default).""" + deps = patched_deps + index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = index + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = True + + response = client.get("/api/v1/remote/helm-test/index.yaml") + assert response.status_code == 200 + deps["cache"].mark_index_cached.assert_not_called() + + def test_index_yaml_urls_rewritten_to_proxy(self, client, patched_deps): + """base_url chart URLs in a cached index.yaml are rewritten to our proxy.""" + deps = patched_deps + index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = index + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = True + + response = client.get("/api/v1/remote/helm-test/index.yaml") + assert response.status_code == 200 + assert b"helm.releases.hashicorp.com" not in response.content + assert b"/api/v1/remote/helm-test/vault-0.29.1.tgz" in response.content + + def test_index_yaml_content_type_is_yaml(self, client, patched_deps): + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n" + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = True + + response = client.get("/api/v1/remote/helm-test/index.yaml") + assert response.status_code == 200 + assert "text/yaml" in response.headers["content-type"] + + def test_chart_tarball_immutable_returns_gzip_content_type(self, client, patched_deps): + """Versioned chart tarballs match immutable_patterns and are served as binary.""" + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"\x1f\x8b chart bytes" + deps["cache"].is_mutable_file.return_value = False + + response = client.get("/api/v1/remote/helm-test/vault-0.29.1.tgz") + assert response.status_code == 200 + assert "application/gzip" in response.headers["content-type"] + assert response.headers["X-Artifact-Source"] == "cache" + + def test_index_yaml_cache_miss_fetches_upstream(self, client, patched_deps): + deps = patched_deps + index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" + deps["storage"].exists.return_value = False + deps["storage"].download_object.return_value = index + deps["cache"].is_mutable_file.return_value = True + + with patch( + "artifactapi.main.cache_single_artifact", + new_callable=AsyncMock, + return_value={"status": "cached"}, + ) as mock_fetch: + response = client.get("/api/v1/remote/helm-test/index.yaml") + + mock_fetch.assert_called_once() + assert response.status_code == 200 + assert b"helm.releases.hashicorp.com" not in response.content + + def test_non_tgz_non_yaml_path_blocked_by_pattern(self, client, patched_deps): + """Paths that don't match immutable_patterns and aren't mutable are blocked.""" + deps = patched_deps + deps["cache"].is_mutable_file.return_value = False + + response = client.get("/api/v1/remote/helm-test/vault.zip") + assert response.status_code == 403