fix: rewrite helm index.yaml URLs post-parse to handle relative URLs (#37)

Closes #33

## Summary

- `_merge_helm_indexes` now parses each member's raw YAML first, then rewrites `urls` entries in-place via the new `_rewrite_urls` helper
- **Relative URLs** (e.g. `rancher-2.13.1.tgz`) are prepended with `{proxy_base}/api/v1/remote/{member_name}/`
- **Absolute URLs** matching `base_url` are rewritten to the proxy path (existing behaviour, now correct)
- **Absolute URLs** with a different prefix are left unchanged
- Removes the `_helm.resolve_content` raw-bytes detour from the virtual merge path; `remote/helm.py` is unchanged (still used for direct remote proxying)

## Test plan

- [x] 278 unit tests pass (`make test`)
- [x] New `TestRewriteUrls` class covering relative, absolute-match, absolute-no-match, leading-slash, and multi-URL cases
- [x] New `test_relative_urls_rewritten_to_proxy` in `TestMergeHelmIndexes`
- [x] Updated `test_first_member_wins_on_duplicate_name_and_version` to assert on proxy remote name (not upstream hostname)
- [x] Live Docker test: Rancher `index.yaml` relative URLs rewritten correctly to `http://localhost:8000/api/v1/remote/rancher-stable/rancher-2.14.1.tgz` etc.
- [x] `helm-all` virtual (19 members) returns HTTP 200 with 395k-line merged index on cache miss

Reviewed-on: #37
This commit was merged in pull request #37.
This commit is contained in:
2026-05-02 01:22:16 +10:00
parent 1656664dfa
commit 624d858062
2 changed files with 89 additions and 42 deletions
+21 -4
View File
@@ -9,8 +9,6 @@ import httpx
import yaml import yaml
from fastapi import HTTPException, Request, Response from fastapi import HTTPException, Request, Response
from ..remote import helm as _helm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -86,6 +84,19 @@ async def _get_member_index(
return member_name, member_cfg, member_ttl, raw_data return member_name, member_cfg, member_ttl, raw_data
def _rewrite_urls(urls: list, base_url: str, proxy_base: str, member_name: str) -> list:
proxy_remote = f"{proxy_base}/api/v1/remote/{member_name}"
rewritten = []
for url in urls:
if url.startswith(("http://", "https://")):
if base_url and url.startswith(base_url):
url = proxy_remote + url[len(base_url) :]
else:
url = f"{proxy_remote}/{url.lstrip('/')}"
rewritten.append(url)
return rewritten
def _merge_helm_indexes(raw_indexes: list[bytes], member_names: list[str], member_configs: list[dict], proxy_base: str) -> bytes: def _merge_helm_indexes(raw_indexes: list[bytes], member_names: list[str], member_configs: list[dict], proxy_base: str) -> bytes:
"""Merge helm index.yaml files with per-member URL rewriting. """Merge helm index.yaml files with per-member URL rewriting.
@@ -96,15 +107,21 @@ def _merge_helm_indexes(raw_indexes: list[bytes], member_names: list[str], membe
for raw_data, member_name, member_cfg in zip(raw_indexes, member_names, member_configs): for raw_data, member_name, member_cfg in zip(raw_indexes, member_names, member_configs):
base_url = member_cfg.get("base_url", "").rstrip("/") base_url = member_cfg.get("base_url", "").rstrip("/")
rewritten, _ = _helm.resolve_content(raw_data, "index.yaml", "index.yaml", base_url, proxy_base, member_name)
try: try:
index = yaml.safe_load(rewritten) index = yaml.safe_load(raw_data)
except Exception as e: except Exception as e:
logger.warning(f"Virtual: failed to parse index.yaml from member '{member_name}': {e}") logger.warning(f"Virtual: failed to parse index.yaml from member '{member_name}': {e}")
continue continue
for chart_name, versions in (index.get("entries") or {}).items(): for chart_name, versions in (index.get("entries") or {}).items():
for version_entry in versions:
version_entry["urls"] = _rewrite_urls(
version_entry.get("urls") or [],
base_url,
proxy_base,
member_name,
)
if chart_name not in merged_entries: if chart_name not in merged_entries:
merged_entries[chart_name] = list(versions) merged_entries[chart_name] = list(versions)
else: else:
+68 -38
View File
@@ -12,6 +12,7 @@ from artifactapi.artifact.virtual import (
_HelmDumper, _HelmDumper,
_HelmHandler, _HelmHandler,
_merge_helm_indexes, _merge_helm_indexes,
_rewrite_urls,
_VirtualHandler, _VirtualHandler,
) )
@@ -66,14 +67,21 @@ entries:
generated: "2023-01-01T00:00:00.000Z" generated: "2023-01-01T00:00:00.000Z"
""" """
_INDEX_RELATIVE = b"""\
apiVersion: v1
entries:
rancher:
- name: rancher
version: "2.13.1"
urls:
- rancher-2.13.1.tgz
generated: "2023-01-01T00:00:00.000Z"
"""
_CFG_A = {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} _CFG_A = {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}}
_CFG_B = {"base_url": "https://charts.example.com", "cache": {"mutable_ttl": 1800}} _CFG_B = {"base_url": "https://charts.example.com", "cache": {"mutable_ttl": 1800}}
def _identity_resolve(data, *args, **kwargs):
return data, None
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# _HelmDumper — datetime/date YAML serialization # _HelmDumper — datetime/date YAML serialization
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -135,8 +143,7 @@ class TestHelmHandler:
assert isinstance(msg, str) and len(msg) > 0 assert isinstance(msg, str) and len(msg) > 0
def test_merge_returns_bytes(self): def test_merge_returns_bytes(self):
with patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve): result = self.handler.merge([_INDEX_A], ["member-a"], [_CFG_A], "http://proxy.example.com")
result = self.handler.merge([_INDEX_A], ["member-a"], [_CFG_A], "http://proxy.example.com")
assert isinstance(result, bytes) assert isinstance(result, bytes)
def test_merge_delegates_to_merge_helm_indexes(self): def test_merge_delegates_to_merge_helm_indexes(self):
@@ -160,6 +167,41 @@ class TestHandlersRegistry:
assert isinstance(_HANDLERS["helm"], _VirtualHandler) assert isinstance(_HANDLERS["helm"], _VirtualHandler)
# ---------------------------------------------------------------------------
# _rewrite_urls
# ---------------------------------------------------------------------------
class TestRewriteUrls:
def _rewrite(self, urls, base_url="https://upstream.example.com", proxy_base="http://proxy.example.com", member_name="my-remote"):
return _rewrite_urls(urls, base_url, proxy_base, member_name)
def test_absolute_url_matching_base_is_rewritten(self):
result = self._rewrite(["https://upstream.example.com/chart-1.0.0.tgz"])
assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"]
def test_relative_url_is_prepended_with_proxy_remote(self):
result = self._rewrite(["chart-1.0.0.tgz"])
assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"]
def test_relative_url_with_leading_slash(self):
result = self._rewrite(["/chart-1.0.0.tgz"])
assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"]
def test_absolute_url_not_matching_base_is_unchanged(self):
result = self._rewrite(["https://other.example.com/chart-1.0.0.tgz"])
assert result == ["https://other.example.com/chart-1.0.0.tgz"]
def test_empty_url_list_returns_empty(self):
assert self._rewrite([]) == []
def test_multiple_urls_all_rewritten(self):
urls = ["https://upstream.example.com/a-1.0.0.tgz", "b-2.0.0.tgz"]
result = self._rewrite(urls)
assert result[0] == "http://proxy.example.com/api/v1/remote/my-remote/a-1.0.0.tgz"
assert result[1] == "http://proxy.example.com/api/v1/remote/my-remote/b-2.0.0.tgz"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# _merge_helm_indexes # _merge_helm_indexes
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -167,8 +209,7 @@ class TestHandlersRegistry:
class TestMergeHelmIndexes: class TestMergeHelmIndexes:
def _merge(self, raw_indexes, member_names, member_configs, proxy_base="http://proxy.example.com"): def _merge(self, raw_indexes, member_names, member_configs, proxy_base="http://proxy.example.com"):
with patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve): return _merge_helm_indexes(raw_indexes, member_names, member_configs, proxy_base)
return _merge_helm_indexes(raw_indexes, member_names, member_configs, proxy_base)
def _parse(self, raw): def _parse(self, raw):
return yaml.safe_load(raw) return yaml.safe_load(raw)
@@ -187,7 +228,18 @@ class TestMergeHelmIndexes:
def test_first_member_wins_on_duplicate_name_and_version(self): def test_first_member_wins_on_duplicate_name_and_version(self):
index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B]))
v027 = next(e for e in index["entries"]["vault"] if e["version"] == "0.27.0") v027 = next(e for e in index["entries"]["vault"] if e["version"] == "0.27.0")
assert "helm.releases.hashicorp.com" in v027["urls"][0] assert "member-a" in v027["urls"][0]
def test_absolute_urls_rewritten_to_proxy(self):
index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A]))
url = index["entries"]["vault"][0]["urls"][0]
assert url == "http://proxy.example.com/api/v1/remote/member-a/vault-0.27.0.tgz"
def test_relative_urls_rewritten_to_proxy(self):
cfg = {"base_url": "https://releases.rancher.com/server-charts/stable", "cache": {"mutable_ttl": 3600}}
index = self._parse(self._merge([_INDEX_RELATIVE], ["rancher-stable"], [cfg]))
url = index["entries"]["rancher"][0]["urls"][0]
assert url == "http://proxy.example.com/api/v1/remote/rancher-stable/rancher-2.13.1.tgz"
def test_different_versions_of_same_chart_both_included(self): def test_different_versions_of_same_chart_both_included(self):
index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B]))
@@ -484,10 +536,7 @@ class TestVirtualRoute:
mock_get.assert_not_called() mock_get.assert_not_called()
def test_cache_miss_returns_200_with_yaml_content_type(self, client, patched_virtual_deps): def test_cache_miss_returns_200_with_yaml_content_type(self, client, patched_virtual_deps):
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
@@ -495,10 +544,7 @@ class TestVirtualRoute:
assert "text/yaml" in response.headers["content-type"] assert "text/yaml" in response.headers["content-type"]
def test_cache_miss_response_contains_merged_entries(self, client, patched_virtual_deps): def test_cache_miss_response_contains_merged_entries(self, client, patched_virtual_deps):
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
@@ -507,10 +553,7 @@ class TestVirtualRoute:
def test_cache_miss_stores_result_in_s3(self, client, patched_virtual_deps): def test_cache_miss_stores_result_in_s3(self, client, patched_virtual_deps):
deps = patched_virtual_deps deps = patched_virtual_deps
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
client.get("/api/v1/virtual/helm-virtual-test/index.yaml") client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
@@ -518,10 +561,7 @@ class TestVirtualRoute:
def test_cache_miss_marks_index_cached(self, client, patched_virtual_deps): def test_cache_miss_marks_index_cached(self, client, patched_virtual_deps):
deps = patched_virtual_deps deps = patched_virtual_deps
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
client.get("/api/v1/virtual/helm-virtual-test/index.yaml") client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
@@ -529,10 +569,7 @@ class TestVirtualRoute:
def test_cache_miss_uses_min_ttl_across_members(self, client, patched_virtual_deps): def test_cache_miss_uses_min_ttl_across_members(self, client, patched_virtual_deps):
deps = patched_virtual_deps deps = patched_virtual_deps
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.side_effect = [ mock_get.side_effect = [
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE), ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE),
("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE), ("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE),
@@ -550,10 +587,7 @@ class TestVirtualRoute:
assert response.status_code == 502 assert response.status_code == 502
def test_one_member_unreachable_still_returns_200(self, client, patched_virtual_deps): def test_one_member_unreachable_still_returns_200(self, client, patched_virtual_deps):
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.side_effect = [ mock_get.side_effect = [
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE), ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE),
("helm-member-2", _CFG_B, 1800, None), ("helm-member-2", _CFG_B, 1800, None),
@@ -572,7 +606,6 @@ class TestVirtualRoute:
with ( with (
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
patch.object(main_mod.config, "get_remote_config", side_effect=patched_get), patch.object(main_mod.config, "get_remote_config", side_effect=patched_get),
): ):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
@@ -586,10 +619,7 @@ class TestVirtualRoute:
deps = patched_virtual_deps deps = patched_virtual_deps
deps["storage"].upload.side_effect = Exception("S3 write error") deps["storage"].upload.side_effect = Exception("S3 write error")
with ( with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve),
):
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")