Merge pull request 'feat: add helm chart repository caching proxy' (#17) from benvin/helm-remote into master
ci/woodpecker/tag/docker Pipeline was successful
ci/woodpecker/tag/docker Pipeline was successful
Reviewed-on: #17
This commit was merged in pull request #17.
This commit is contained in:
@@ -14,6 +14,7 @@ A generic FastAPI-based artifact caching system that downloads and stores files
|
|||||||
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
||||||
- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching
|
- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching
|
||||||
- **npm Package Proxy**: Caching proxy for the npm registry with metadata URL rewriting so tarballs also pass through cache
|
- **npm Package Proxy**: Caching proxy for the npm registry with metadata URL rewriting so tarballs also pass through cache
|
||||||
|
- **Helm Chart Repository Proxy**: Caching proxy for Helm chart repositories with `index.yaml` URL rewriting so chart tarballs also pass through cache
|
||||||
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
@@ -1097,3 +1098,51 @@ The client then downloads the tarball via the rewritten URL, which hits the same
|
|||||||
| `/-/all` | Mutable (TTL) | `/-/all` |
|
| `/-/all` | Mutable (TTL) | `/-/all` |
|
||||||
| `/{package}/-/{package}-{version}.tgz` | Immutable (forever) | `/express/-/express-4.18.2.tgz` |
|
| `/{package}/-/{package}-{version}.tgz` | Immutable (forever) | `/express/-/express-4.18.2.tgz` |
|
||||||
| `/@{scope}/{pkg}/-/{pkg}-{ver}.tgz` | Immutable (forever) | `/@babel/core/-/core-7.21.0.tgz` |
|
| `/@{scope}/{pkg}/-/{pkg}-{ver}.tgz` | Immutable (forever) | `/@babel/core/-/core-7.21.0.tgz` |
|
||||||
|
|
||||||
|
## Helm Chart Repository Proxy
|
||||||
|
|
||||||
|
The `helm` package type turns the artifact API into a caching Helm chart repository proxy. A single remote handles both the mutable `index.yaml` and the immutable versioned chart tarballs, since they are served from the same upstream host. Chart URLs inside `index.yaml` are rewritten on the fly to point back through the same remote, so both the index lookup and the chart download are served from cache.
|
||||||
|
|
||||||
|
### remotes.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
hashicorp-helm:
|
||||||
|
base_url: "https://helm.releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "helm"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "\\.tgz$" # chart tarballs — cache forever
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuring Helm
|
||||||
|
|
||||||
|
Point Helm at the proxy with `helm repo add`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
helm repo add hashicorp https://artifacts.example.com/api/v1/remote/hashicorp-helm
|
||||||
|
helm repo update
|
||||||
|
helm search repo hashicorp/vault
|
||||||
|
helm install vault hashicorp/vault
|
||||||
|
```
|
||||||
|
|
||||||
|
### How the rewriting works
|
||||||
|
|
||||||
|
When a client requests `index.yaml`, the proxy:
|
||||||
|
|
||||||
|
1. Fetches `https://helm.releases.hashicorp.com/index.yaml` (or returns a cached copy within `mutable_ttl`)
|
||||||
|
2. Rewrites every `https://helm.releases.hashicorp.com/...` chart URL to `https://artifacts.example.com/api/v1/remote/hashicorp-helm/...`
|
||||||
|
3. Returns the rewritten YAML to the client
|
||||||
|
|
||||||
|
The client then downloads chart tarballs via the rewritten URLs, which hit the same `hashicorp-helm` remote and are cached as immutable artifacts. Subsequent installs of the same chart version are served entirely from S3.
|
||||||
|
|
||||||
|
### Mutable vs immutable paths
|
||||||
|
|
||||||
|
| Path | Type | Example |
|
||||||
|
|---|---|---|
|
||||||
|
| `index.yaml` | Mutable (TTL) | `index.yaml` |
|
||||||
|
| `{chart}-{version}.tgz` | Immutable (forever) | `vault-0.29.1.tgz` |
|
||||||
@@ -261,6 +261,18 @@ remotes:
|
|||||||
immutable_ttl: 0
|
immutable_ttl: 0
|
||||||
mutable_ttl: 600 # Package metadata refreshed after 10 minutes
|
mutable_ttl: 600 # Package metadata refreshed after 10 minutes
|
||||||
|
|
||||||
|
hashicorp-helm:
|
||||||
|
base_url: "https://helm.releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "helm"
|
||||||
|
description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "\\.tgz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Chart tarballs are versioned — cache forever
|
||||||
|
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||||
|
|
||||||
local-generic:
|
local-generic:
|
||||||
type: "local"
|
type: "local"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
|
|||||||
@@ -22,6 +22,9 @@ _PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = {
|
|||||||
r"simple/", # Per-package and top-level simple index pages
|
r"simple/", # Per-package and top-level simple index pages
|
||||||
],
|
],
|
||||||
"npm": [],
|
"npm": [],
|
||||||
|
"helm": [
|
||||||
|
r"index\.yaml$",
|
||||||
|
],
|
||||||
"generic": [],
|
"generic": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+13
-2
@@ -349,6 +349,8 @@ def _get_content_type(filename: str) -> str:
|
|||||||
return "application/xml"
|
return "application/xml"
|
||||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
return "application/gzip"
|
return "application/gzip"
|
||||||
|
if filename.endswith((".yaml", ".yml")):
|
||||||
|
return "text/yaml"
|
||||||
return "application/octet-stream"
|
return "application/octet-stream"
|
||||||
|
|
||||||
|
|
||||||
@@ -358,6 +360,7 @@ def _resolve_content(
|
|||||||
filename: str,
|
filename: str,
|
||||||
remote_config: dict,
|
remote_config: dict,
|
||||||
request: Request,
|
request: Request,
|
||||||
|
remote_name: str = "",
|
||||||
) -> tuple[bytes, str]:
|
) -> tuple[bytes, str]:
|
||||||
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
||||||
if remote_config.get("package") == "pypi" and "simple/" in path:
|
if remote_config.get("package") == "pypi" and "simple/" in path:
|
||||||
@@ -378,6 +381,14 @@ def _resolve_content(
|
|||||||
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
||||||
)
|
)
|
||||||
return data, "application/json"
|
return data, "application/json"
|
||||||
|
if remote_config.get("package") == "helm" and filename == "index.yaml":
|
||||||
|
proxy_base = str(request.base_url).rstrip("/")
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/yaml"
|
||||||
return data, _get_content_type(filename)
|
return data, _get_content_type(filename)
|
||||||
|
|
||||||
|
|
||||||
@@ -445,7 +456,7 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
|||||||
try:
|
try:
|
||||||
artifact_data = storage.download_object(cached_key)
|
artifact_data = storage.download_object(cached_key)
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request)
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
|
||||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||||
|
|
||||||
@@ -486,7 +497,7 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
|||||||
cache_key = storage.get_object_key(remote_name, path)
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
artifact_data = storage.download_object(cache_key)
|
artifact_data = storage.download_object(cache_key)
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request)
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
|
||||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||||
cache_key = storage.get_object_key(remote_name, path)
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
|
|||||||
@@ -101,6 +101,13 @@ TEST_REMOTES = {
|
|||||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
},
|
},
|
||||||
|
"helm-test": {
|
||||||
|
"base_url": "https://helm.releases.hashicorp.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "helm",
|
||||||
|
"immutable_patterns": [r"\.tgz$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -154,6 +154,20 @@ class TestGetMutablePatterns:
|
|||||||
assert any(re.search(p, "express") for p in patterns)
|
assert any(re.search(p, "express") for p in patterns)
|
||||||
assert any(re.search(p, "@babel/core") for p in patterns)
|
assert any(re.search(p, "@babel/core") for p in patterns)
|
||||||
|
|
||||||
|
def test_helm_returns_index_yaml_as_mutable(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"index\.yaml$" in patterns
|
||||||
|
|
||||||
|
def test_helm_chart_tarballs_not_mutable_by_default(self, make_config):
|
||||||
|
import re
|
||||||
|
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
# Only index.yaml is mutable; .tgz chart tarballs are not
|
||||||
|
assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns)
|
||||||
|
assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns)
|
||||||
|
|
||||||
def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config):
|
def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config):
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|||||||
@@ -841,3 +841,86 @@ class TestNpmRemote:
|
|||||||
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "application/gzip" in response.headers["content-type"]
|
assert "application/gzip" in response.headers["content-type"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helm remote /api/v1/remote/helm-test/...
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestHelmRemote:
|
||||||
|
def test_index_yaml_is_mutable(self, client, patched_deps):
|
||||||
|
"""index.yaml is detected as mutable (package-type default)."""
|
||||||
|
deps = patched_deps
|
||||||
|
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = index
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = True
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||||
|
assert response.status_code == 200
|
||||||
|
deps["cache"].mark_index_cached.assert_not_called()
|
||||||
|
|
||||||
|
def test_index_yaml_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||||
|
"""base_url chart URLs in a cached index.yaml are rewritten to our proxy."""
|
||||||
|
deps = patched_deps
|
||||||
|
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = index
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = True
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert b"helm.releases.hashicorp.com" not in response.content
|
||||||
|
assert b"/api/v1/remote/helm-test/vault-0.29.1.tgz" in response.content
|
||||||
|
|
||||||
|
def test_index_yaml_content_type_is_yaml(self, client, patched_deps):
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n"
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = True
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "text/yaml" in response.headers["content-type"]
|
||||||
|
|
||||||
|
def test_chart_tarball_immutable_returns_gzip_content_type(self, client, patched_deps):
|
||||||
|
"""Versioned chart tarballs match immutable_patterns and are served as binary."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"\x1f\x8b chart bytes"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/helm-test/vault-0.29.1.tgz")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "application/gzip" in response.headers["content-type"]
|
||||||
|
assert response.headers["X-Artifact-Source"] == "cache"
|
||||||
|
|
||||||
|
def test_index_yaml_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||||
|
deps = patched_deps
|
||||||
|
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = index
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.main.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached"},
|
||||||
|
) as mock_fetch:
|
||||||
|
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||||
|
|
||||||
|
mock_fetch.assert_called_once()
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert b"helm.releases.hashicorp.com" not in response.content
|
||||||
|
|
||||||
|
def test_non_tgz_non_yaml_path_blocked_by_pattern(self, client, patched_deps):
|
||||||
|
"""Paths that don't match immutable_patterns and aren't mutable are blocked."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/helm-test/vault.zip")
|
||||||
|
assert response.status_code == 403
|
||||||
|
|||||||
Reference in New Issue
Block a user