From c7baae8d0d3002ace87411da219507bda5c7f84a Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Wed, 29 Apr 2026 23:01:14 +1000 Subject: [PATCH] feat: add virtual repository support for unified index merging (#30) Adds a new virtual repo type that merges indexes from multiple member remotes of the same package type. Currently supports helm (index.yaml merge with URL rewriting). Member fetches run in parallel; merged index is Redis-cached at min(mutable_ttl) across members. Reviewed-on: https://git.unkin.net/unkin/artifactapi/pulls/30 --- README.md | 68 +++- examples/single-file/remotes.yaml | 241 +++++++++++ src/artifactapi/artifact/virtual.py | 227 +++++++++++ src/artifactapi/main.py | 7 +- tests/conftest.py | 22 + tests/test_virtual.py | 596 ++++++++++++++++++++++++++++ 6 files changed, 1159 insertions(+), 2 deletions(-) create mode 100644 src/artifactapi/artifact/virtual.py create mode 100644 tests/test_virtual.py diff --git a/README.md b/README.md index 590d9ce..03ca7af 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ FastAPI caching proxy that downloads and stores files from remote sources in S3- ## Features - Remote definitions via `remotes.yaml` — generic HTTP, Alpine APK, RPM, Docker, PyPI, npm, Helm +- Virtual repositories — merge multiple remotes of the same package type into a single unified index - Immutable/mutable caching model with per-remote TTLs - Conditional revalidation (`If-None-Match` / `If-Modified-Since`) on TTL expiry - Stale-on-upstream-error: refreshes TTL when backend is unreachable rather than evicting @@ -37,6 +38,7 @@ src/artifactapi/ ├── docker_auth.py — backwards-compat shim → auth/docker.py ├── artifact/ — route handler implementations │ ├── proxy.py — GET /api/v1/remote (remote proxy, cache, revalidation) +│ ├── virtual.py — GET /api/v1/virtual (virtual repo index merging) │ ├── local.py — PUT/HEAD/DELETE /api/v1/remote (local repos) │ ├── docker.py — /v2/ Docker Registry v2 proxy │ ├── discovery.py — /api/v1/artifacts discovery + bulk cache @@ -71,6 +73,7 @@ src/artifactapi/ | `PUT` | `/api/v1/remote/{remote}/{path}` | Upload to local remote | | `HEAD` | `/api/v1/remote/{remote}/{path}` | Check existence (local remotes) | | `DELETE` | `/api/v1/remote/{remote}/{path}` | Delete from local remote | +| `GET` | `/api/v1/virtual/{virtual}/{path}` | Fetch from virtual (merged) repository | | `GET` | `/v2/{remote}/{path}` | Docker Registry v2 proxy | | `PUT` | `/cache/flush` | Flush cache entries | | `GET` | `/health` | Health check | @@ -123,7 +126,7 @@ remotes: {} # optional base remotes remotes: remote-name: base_url: "https://example.com" - type: "remote" # "remote" or "local" + type: "remote" # "remote", "local", or "virtual" package: "generic" # generic, alpine, rpm, docker, pypi, npm, helm description: "..." immutable_patterns: # regex — cached forever @@ -330,6 +333,69 @@ helm repo add hashicorp https://artifacts.example.com/api/v1/remote/hashicorp-he helm repo update ``` +### virtual + +A virtual repository presents a single unified index built from multiple member remotes of the same package type. Clients configure one endpoint and get access to all member remotes transparently. + +All members must share the same `package` type as the virtual repo. Currently supported package types: `helm`. + +```yaml +remotes: + helm-hashicorp: + base_url: "https://helm.releases.hashicorp.com" + type: "remote" + package: "helm" + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + helm-bitnami: + base_url: "https://charts.bitnami.com/bitnami" + type: "remote" + package: "helm" + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + helm-all: + type: "virtual" + package: "helm" + members: + - helm-hashicorp # listed first = highest priority + - helm-bitnami +``` + +**How it works:** + +1. A request for the package index triggers a parallel fetch of each member's index from S3 cache, falling back to upstream if not yet cached. +2. Member indexes are merged into a single index with URL rewriting so artifact download URLs continue to resolve through the individual member remote. +3. The merged index is cached in Redis with a TTL equal to the minimum `mutable_ttl` across all members. + +**Priority / conflict resolution:** + +When the same artifact name and version appears in more than one member, the member listed **first** in `members` wins. Subsequent members contribute only artifacts not already present. + +**Partial failures:** + +If a member is unreachable and has no cached index, it is skipped and a warning is logged. The merged index is still served from available members. If *no* members can be reached, the request returns `502`. + +**Caching:** + +The merged index is cached using `min(mutable_ttl)` across all members. Each member's raw index is cached in S3 under its own remote key by the normal proxy rules; the virtual handler reuses those copies when available. + +**Helm example:** + +```bash +helm repo add all https://artifacts.example.com/api/v1/virtual/helm-all +helm repo update +``` + +Chart tarball URLs in the merged `index.yaml` are rewritten to point at the individual member remote (e.g. `…/api/v1/remote/helm-hashicorp/vault-0.27.0.tgz`), so downloads bypass the virtual endpoint entirely. + ### local ```yaml diff --git a/examples/single-file/remotes.yaml b/examples/single-file/remotes.yaml index a82b676..6e36b5b 100644 --- a/examples/single-file/remotes.yaml +++ b/examples/single-file/remotes.yaml @@ -268,6 +268,247 @@ remotes: immutable_ttl: 0 # Chart tarballs are versioned — cache forever mutable_ttl: 3600 # index.yaml refreshed after 1 hour + metallb: + base_url: "https://metallb.github.io/metallb" + type: "remote" + package: "helm" + description: "MetalLB load balancer Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + jetstack: + base_url: "https://charts.jetstack.io" + type: "remote" + package: "helm" + description: "Jetstack Helm charts (cert-manager)" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + rancher-stable: + base_url: "https://releases.rancher.com/server-charts/stable" + type: "remote" + package: "helm" + description: "Rancher stable Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + purelb: + base_url: "https://gitlab.com/api/v4/projects/20400619/packages/helm/stable" + type: "remote" + package: "helm" + description: "PureLB load balancer Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + istio: + base_url: "https://istio-release.storage.googleapis.com/charts" + type: "remote" + package: "helm" + description: "Istio service mesh Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + cnpg: + base_url: "https://cloudnative-pg.github.io/charts" + type: "remote" + package: "helm" + description: "CloudNativePG operator Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + ceph-csi: + base_url: "https://ceph.github.io/csi-charts" + type: "remote" + package: "helm" + description: "Ceph CSI driver Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + external-dns: + base_url: "https://kubernetes-sigs.github.io/external-dns/" + type: "remote" + package: "helm" + description: "ExternalDNS Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + intel-helm: + base_url: "https://intel.github.io/helm-charts/" + type: "remote" + package: "helm" + description: "Intel Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + elastic: + base_url: "https://helm.elastic.co" + type: "remote" + package: "helm" + description: "Elastic stack Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + k8up-io: + base_url: "https://k8up-io.github.io/k8up" + type: "remote" + package: "helm" + description: "K8up backup operator Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + victoriametrics: + base_url: "https://victoriametrics.github.io/helm-charts/" + type: "remote" + package: "helm" + description: "VictoriaMetrics observability Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + grafana: + base_url: "https://grafana.github.io/helm-charts" + type: "remote" + package: "helm" + description: "Grafana observability Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + helm-openldap: + base_url: "https://jp-gouin.github.io/helm-openldap/" + type: "remote" + package: "helm" + description: "OpenLDAP Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + woodpecker: + base_url: "https://woodpecker-ci.org/" + type: "remote" + package: "helm" + description: "Woodpecker CI Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + stakater: + base_url: "https://stakater.github.io/stakater-charts" + type: "remote" + package: "helm" + description: "Stakater Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + jfrog: + base_url: "https://charts.jfrog.io/" + type: "remote" + package: "helm" + description: "JFrog Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + openvox: + base_url: "https://openvoxproject.github.io/openvox-helm-chart" + type: "remote" + package: "helm" + description: "OpenVox Helm charts" + check_mutable_updates: true + immutable_patterns: + - "\\.tgz$" + cache: + immutable_ttl: 0 + mutable_ttl: 3600 + + helm-all: + type: "virtual" + package: "helm" + description: "Virtual repository merging all helm remotes — member order is priority order for duplicate chart+version" + members: + - hashicorp-helm + - metallb + - jetstack + - rancher-stable + - purelb + - istio + - cnpg + - ceph-csi + - external-dns + - intel-helm + - elastic + - k8up-io + - victoriametrics + - grafana + - helm-openldap + - woodpecker + - stakater + - jfrog + - openvox + local-generic: type: "local" package: "generic" diff --git a/src/artifactapi/artifact/virtual.py b/src/artifactapi/artifact/virtual.py new file mode 100644 index 0000000..0bae5e6 --- /dev/null +++ b/src/artifactapi/artifact/virtual.py @@ -0,0 +1,227 @@ +import asyncio +import base64 +import logging +import time +from datetime import UTC, date, datetime +from typing import Protocol, runtime_checkable + +import httpx +import yaml +from fastapi import HTTPException, Request, Response + +from ..remote import helm as _helm + +logger = logging.getLogger(__name__) + + +class _HelmDumper(yaml.Dumper): + """YAML dumper that serializes datetime/date objects back to ISO 8601 strings. + + yaml.safe_load converts timestamp-shaped YAML scalars (e.g. chart `created` + fields) to Python datetime objects. Without a custom representer, yaml.dump + would render them as "2022-12-16 11:08:49+00:00" (space, not T), which + Go's YAML parser cannot unmarshal into time.Time. + """ + + +def _repr_datetime(dumper: yaml.Dumper, data: datetime) -> yaml.ScalarNode: + s = data.strftime("%Y-%m-%dT%H:%M:%S.%f") + ("Z" if data.tzinfo else "") + return dumper.represent_scalar("tag:yaml.org,2002:str", s) + + +def _repr_date(dumper: yaml.Dumper, data: date) -> yaml.ScalarNode: + return dumper.represent_scalar("tag:yaml.org,2002:str", data.isoformat()) + + +_HelmDumper.add_representer(datetime, _repr_datetime) +_HelmDumper.add_representer(date, _repr_date) + + +async def _get_member_index( + member_name: str, + member_cfg: dict, + path: str, + storage, + cache, +) -> tuple[str, dict, int, bytes | None]: + """Fetch or retrieve cached index.yaml for one member remote. + + Returns (member_name, member_cfg, ttl, raw_bytes). + raw_bytes is None if the member is unreachable and not in S3. + """ + member_ttl = member_cfg.get("cache", {}).get("mutable_ttl", 3600) + s3_key = storage.get_object_key(member_name, path) + raw_data: bytes | None = None + + if storage.exists(s3_key) and cache.is_index_valid(member_name, path): + try: + raw_data = storage.download_object(s3_key) + logger.info(f"Virtual: cache hit for member '{member_name}'") + except Exception: + raw_data = None + + if raw_data is None: + base_url = member_cfg.get("base_url", "").rstrip("/") + upstream_url = f"{base_url}/index.yaml" + headers = {} + username = member_cfg.get("username") + password = member_cfg.get("password") + if username and password: + token = base64.b64encode(f"{username}:{password}".encode()).decode() + headers["Authorization"] = f"Basic {token}" + try: + async with httpx.AsyncClient(follow_redirects=True) as client: + response = await client.get(upstream_url, headers=headers, timeout=30.0) + response.raise_for_status() + raw_data = response.content + except Exception as e: + logger.warning(f"Virtual: failed to fetch index.yaml from member '{member_name}': {e}") + return member_name, member_cfg, member_ttl, None + try: + storage.upload(s3_key, raw_data) + cache.mark_index_cached(member_name, path, member_ttl) + except Exception as e: + logger.warning(f"Virtual: failed to cache index.yaml for member '{member_name}': {e}") + + return member_name, member_cfg, member_ttl, raw_data + + +def _merge_helm_indexes(raw_indexes: list[bytes], member_names: list[str], member_configs: list[dict], proxy_base: str) -> bytes: + """Merge helm index.yaml files with per-member URL rewriting. + + Priority is determined by position in member_names: earlier members win + when the same chart name + version appears in multiple remotes. + """ + merged_entries: dict[str, list] = {} + + for raw_data, member_name, member_cfg in zip(raw_indexes, member_names, member_configs): + base_url = member_cfg.get("base_url", "").rstrip("/") + rewritten, _ = _helm.resolve_content(raw_data, "index.yaml", "index.yaml", base_url, proxy_base, member_name) + + try: + index = yaml.safe_load(rewritten) + except Exception as e: + logger.warning(f"Virtual: failed to parse index.yaml from member '{member_name}': {e}") + continue + + for chart_name, versions in (index.get("entries") or {}).items(): + if chart_name not in merged_entries: + merged_entries[chart_name] = list(versions) + else: + existing = {(v.get("name"), v.get("version")) for v in merged_entries[chart_name]} + for version_entry in versions: + key = (version_entry.get("name"), version_entry.get("version")) + if key not in existing: + merged_entries[chart_name].append(version_entry) + existing.add(key) + + merged = { + "apiVersion": "v1", + "entries": merged_entries, + "generated": datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.000Z"), + } + return yaml.dump(merged, Dumper=_HelmDumper, default_flow_style=False, allow_unicode=True).encode() + + +@runtime_checkable +class _VirtualHandler(Protocol): + def accepts_path(self, path: str) -> bool: ... + def merge(self, raw_indexes: list[bytes], member_names: list[str], member_configs: list[dict], proxy_base: str) -> bytes: ... + def path_error(self) -> str: ... + + +class _HelmHandler: + def accepts_path(self, path: str) -> bool: + return path == "index.yaml" + + def merge(self, raw_indexes: list[bytes], member_names: list[str], member_configs: list[dict], proxy_base: str) -> bytes: + return _merge_helm_indexes(raw_indexes, member_names, member_configs, proxy_base) + + def path_error(self) -> str: + return "Virtual helm repositories only serve index.yaml; chart tarballs are served directly by member remotes" + + +_HANDLERS: dict[str, _VirtualHandler] = { + "helm": _HelmHandler(), +} + + +async def handle(request: Request, virtual_name: str, path: str, storage, cache, config) -> Response: + virtual_cfg = config.get_remote_config(virtual_name) + if not virtual_cfg: + raise HTTPException(status_code=404, detail=f"Virtual repository '{virtual_name}' not configured") + if virtual_cfg.get("type") != "virtual": + raise HTTPException(status_code=400, detail=f"'{virtual_name}' is not a virtual repository") + + package = virtual_cfg.get("package") + handler = _HANDLERS.get(package) + if handler is None: + raise HTTPException(status_code=400, detail=f"Virtual repositories with package '{package}' are not yet supported") + + if not handler.accepts_path(path): + raise HTTPException(status_code=404, detail=handler.path_error()) + + members = virtual_cfg.get("members", []) + if not members: + raise HTTPException(status_code=500, detail=f"Virtual repository '{virtual_name}' has no members configured") + + virtual_key = storage.get_object_key(virtual_name, path) + + if cache.is_index_valid(virtual_name, path) and storage.exists(virtual_key): + data = storage.download_object(virtual_key) + logger.info(f"Virtual HIT: {virtual_name}/{path}") + return Response(content=data, media_type="text/yaml") + + # Resolve configs first (config reads are sync/cheap) + member_entries = [] + for member_name in members: + member_cfg = config.get_remote_config(member_name) + if not member_cfg: + logger.warning(f"Virtual '{virtual_name}': member '{member_name}' not found in config, skipping") + continue + member_entries.append((member_name, member_cfg)) + + # Fetch all member indexes in parallel; asyncio.gather preserves input order + proxy_base = str(request.base_url).rstrip("/") + t_fetch = time.perf_counter() + results = await asyncio.gather(*[_get_member_index(name, cfg, path, storage, cache) for name, cfg in member_entries]) + fetch_ms = int((time.perf_counter() - t_fetch) * 1000) + + raw_indexes: list[bytes] = [] + used_members: list[str] = [] + used_configs: list[dict] = [] + min_ttl: int | None = None + + for member_name, member_cfg, member_ttl, raw_data in results: + if min_ttl is None or member_ttl < min_ttl: + min_ttl = member_ttl + if raw_data is None: + logger.warning(f"Virtual '{virtual_name}': skipping unreachable member '{member_name}'") + continue + raw_indexes.append(raw_data) + used_members.append(member_name) + used_configs.append(member_cfg) + + if not raw_indexes: + raise HTTPException(status_code=502, detail=f"Virtual repository '{virtual_name}': no member indices could be fetched") + + if min_ttl is None: + min_ttl = 3600 + + t_merge = time.perf_counter() + merged = handler.merge(raw_indexes, used_members, used_configs, proxy_base) + merge_ms = int((time.perf_counter() - t_merge) * 1000) + + try: + t_store = time.perf_counter() + storage.upload(virtual_key, merged) + cache.mark_index_cached(virtual_name, path, min_ttl) + store_ms = int((time.perf_counter() - t_store) * 1000) + logger.info( + f"Virtual MISS: {virtual_name}/{path} rebuilt from {used_members} " + f"(fetch={fetch_ms}ms merge={merge_ms}ms store={store_ms}ms ttl={min_ttl}s)" + ) + except Exception as e: + logger.warning(f"Virtual: failed to store merged index for '{virtual_name}': {e}") + + return Response(content=merged, media_type="text/yaml") diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 1f99e96..e9e42ee 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -13,7 +13,7 @@ try: except ImportError: __version__ = "dev" -from .artifact import discovery, flush, local, proxy +from .artifact import discovery, flush, local, proxy, virtual from .artifact import docker as docker_handler from .cache import RedisCache from .config import ConfigManager @@ -89,6 +89,11 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str): return await docker_handler.proxy(request, remote_name, path, storage, cache, config, metrics) +@app.get("/api/v1/virtual/{virtual_name}/{path:path}") +async def get_virtual_artifact(request: Request, virtual_name: str, path: str): + return await virtual.handle(request, virtual_name, path, storage, cache, config) + + @app.get("/api/v1/remote/{remote_name}/{path:path}") async def get_artifact(request: Request, remote_name: str, path: str): return await proxy.handle(request, remote_name, path, storage, cache, config, database, metrics) diff --git a/tests/conftest.py b/tests/conftest.py index 1178062..2e23c3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -116,6 +116,28 @@ TEST_REMOTES = { "quarantine_days": 3, "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, }, + "helm-member-2": { + "base_url": "https://charts.example.com", + "type": "remote", + "package": "helm", + "immutable_patterns": [r"\.tgz$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 1800}, + }, + "helm-virtual-test": { + "type": "virtual", + "package": "helm", + "members": ["helm-test", "helm-member-2"], + }, + "unsupported-virtual-test": { + "type": "virtual", + "package": "rpm", + "members": ["rpm-test"], + }, + "empty-virtual-test": { + "type": "virtual", + "package": "helm", + "members": [], + }, } } diff --git a/tests/test_virtual.py b/tests/test_virtual.py new file mode 100644 index 0000000..5094e2d --- /dev/null +++ b/tests/test_virtual.py @@ -0,0 +1,596 @@ +"""Unit tests for the virtual repository handler (artifact/virtual.py).""" + +from datetime import UTC, date, datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import yaml + +from artifactapi.artifact.virtual import ( + _HANDLERS, + _get_member_index, + _HelmDumper, + _HelmHandler, + _merge_helm_indexes, + _VirtualHandler, +) + +# --------------------------------------------------------------------------- +# Shared sample data +# --------------------------------------------------------------------------- + +_INDEX_A = b"""\ +apiVersion: v1 +entries: + vault: + - name: vault + version: "0.27.0" + urls: + - https://helm.releases.hashicorp.com/vault-0.27.0.tgz + consul: + - name: consul + version: "1.2.0" + urls: + - https://helm.releases.hashicorp.com/consul-1.2.0.tgz +generated: "2023-01-01T00:00:00.000Z" +""" + +_INDEX_B = b"""\ +apiVersion: v1 +entries: + nginx: + - name: nginx + version: "15.0.0" + urls: + - https://charts.example.com/nginx-15.0.0.tgz + vault: + - name: vault + version: "0.27.0" + urls: + - https://charts.example.com/vault-0.27.0.tgz + - name: vault + version: "0.26.0" + urls: + - https://charts.example.com/vault-0.26.0.tgz +generated: "2023-01-01T00:00:00.000Z" +""" + +_INDEX_SIMPLE = b"""\ +apiVersion: v1 +entries: + mychart: + - name: mychart + version: "1.0.0" + urls: + - https://helm.releases.hashicorp.com/mychart-1.0.0.tgz +generated: "2023-01-01T00:00:00.000Z" +""" + +_CFG_A = {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} +_CFG_B = {"base_url": "https://charts.example.com", "cache": {"mutable_ttl": 1800}} + + +def _identity_resolve(data, *args, **kwargs): + return data, None + + +# --------------------------------------------------------------------------- +# _HelmDumper — datetime/date YAML serialization +# --------------------------------------------------------------------------- + + +class TestHelmDumper: + def _dump(self, value): + return yaml.dump({"v": value}, Dumper=_HelmDumper) + + def test_datetime_with_tz_includes_Z_suffix(self): + dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) + assert "Z" in self._dump(dt) + + def test_datetime_without_tz_has_no_Z_suffix(self): + dt = datetime(2023, 6, 15, 12, 0, 0) + assert "Z" not in self._dump(dt) + + def test_datetime_uses_T_separator_not_space(self): + dt = datetime(2023, 6, 15, 12, 30, 0, tzinfo=UTC) + assert "T12:30:00" in self._dump(dt) + + def test_date_serialized_as_iso_string(self): + assert "2023-01-15" in self._dump(date(2023, 1, 15)) + + def test_datetime_round_trips_as_string_not_python_datetime(self): + dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) + parsed = yaml.safe_load(self._dump(dt)) + # yaml.safe_load must not re-parse this as a datetime object + assert isinstance(parsed["v"], str) + + def test_date_round_trips_as_string_not_python_date(self): + parsed = yaml.safe_load(self._dump(date(2023, 1, 15))) + assert isinstance(parsed["v"], str) + + +# --------------------------------------------------------------------------- +# _HelmHandler +# --------------------------------------------------------------------------- + + +class TestHelmHandler: + def setup_method(self): + self.handler = _HelmHandler() + + def test_accepts_index_yaml(self): + assert self.handler.accepts_path("index.yaml") is True + + def test_rejects_tgz_path(self): + assert self.handler.accepts_path("vault-0.27.0.tgz") is False + + def test_rejects_subdirectory_index(self): + assert self.handler.accepts_path("charts/index.yaml") is False + + def test_rejects_empty_path(self): + assert self.handler.accepts_path("") is False + + def test_path_error_is_non_empty_string(self): + msg = self.handler.path_error() + assert isinstance(msg, str) and len(msg) > 0 + + def test_merge_returns_bytes(self): + with patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve): + result = self.handler.merge([_INDEX_A], ["member-a"], [_CFG_A], "http://proxy.example.com") + assert isinstance(result, bytes) + + def test_merge_delegates_to_merge_helm_indexes(self): + with patch("artifactapi.artifact.virtual._merge_helm_indexes", return_value=b"merged") as mock_fn: + result = self.handler.merge([b"data"], ["m"], [{}], "http://proxy") + mock_fn.assert_called_once_with([b"data"], ["m"], [{}], "http://proxy") + assert result == b"merged" + + +# --------------------------------------------------------------------------- +# _HANDLERS registry +# --------------------------------------------------------------------------- + + +class TestHandlersRegistry: + def test_helm_handler_is_registered(self): + assert "helm" in _HANDLERS + assert isinstance(_HANDLERS["helm"], _HelmHandler) + + def test_helm_handler_satisfies_protocol(self): + assert isinstance(_HANDLERS["helm"], _VirtualHandler) + + +# --------------------------------------------------------------------------- +# _merge_helm_indexes +# --------------------------------------------------------------------------- + + +class TestMergeHelmIndexes: + def _merge(self, raw_indexes, member_names, member_configs, proxy_base="http://proxy.example.com"): + with patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve): + return _merge_helm_indexes(raw_indexes, member_names, member_configs, proxy_base) + + def _parse(self, raw): + return yaml.safe_load(raw) + + def test_single_member_all_charts_present(self): + index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) + assert "vault" in index["entries"] + assert "consul" in index["entries"] + + def test_two_members_non_overlapping_charts_all_present(self): + index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) + assert "vault" in index["entries"] + assert "consul" in index["entries"] + assert "nginx" in index["entries"] + + def test_first_member_wins_on_duplicate_name_and_version(self): + index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) + v027 = next(e for e in index["entries"]["vault"] if e["version"] == "0.27.0") + assert "helm.releases.hashicorp.com" in v027["urls"][0] + + def test_different_versions_of_same_chart_both_included(self): + index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) + versions = {e["version"] for e in index["entries"]["vault"]} + assert "0.27.0" in versions + assert "0.26.0" in versions + + def test_malformed_yaml_from_member_is_skipped(self): + index = self._parse(self._merge([_INDEX_A, b"{bad yaml"], ["member-a", "bad"], [_CFG_A, _CFG_B])) + assert "vault" in index["entries"] + assert "consul" in index["entries"] + + def test_output_has_apiVersion_v1(self): + index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) + assert index["apiVersion"] == "v1" + + def test_output_has_generated_field(self): + index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) + assert "generated" in index + + def test_output_is_valid_yaml(self): + raw = self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B]) + assert isinstance(yaml.safe_load(raw), dict) + + def test_empty_index_from_member_produces_no_entries(self): + empty = b"apiVersion: v1\nentries: {}\ngenerated: '2023-01-01T00:00:00.000Z'\n" + index = self._parse(self._merge([empty], ["member-a"], [_CFG_A])) + assert index["entries"] == {} + + +# --------------------------------------------------------------------------- +# _get_member_index (async) +# --------------------------------------------------------------------------- + + +class TestGetMemberIndex: + @pytest.fixture + def storage(self): + m = MagicMock() + m.get_object_key.return_value = "member/key/index.yaml" + m.exists.return_value = False + m.download_object.return_value = b"cached bytes" + return m + + @pytest.fixture + def cache(self): + m = MagicMock() + m.is_index_valid.return_value = False + return m + + @pytest.fixture + def member_cfg(self): + return {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} + + def _fake_response(self, content=b"upstream bytes"): + r = MagicMock() + r.content = content + r.raise_for_status = MagicMock() + return r + + def _patch_httpx(self, response): + mock_client_cls = patch("artifactapi.artifact.virtual.httpx.AsyncClient") + p = mock_client_cls.start() + mock_client = AsyncMock() + p.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = response + return mock_client_cls, mock_client + + async def test_cache_hit_returns_stored_bytes(self, storage, cache, member_cfg): + storage.exists.return_value = True + cache.is_index_valid.return_value = True + + _, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + assert raw_data == b"cached bytes" + + async def test_cache_hit_does_not_fetch_upstream(self, storage, cache, member_cfg): + storage.exists.return_value = True + cache.is_index_valid.return_value = True + + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + mock_cls.assert_not_called() + + async def test_cache_hit_storage_error_falls_through_to_upstream(self, storage, cache, member_cfg): + storage.exists.return_value = True + cache.is_index_valid.return_value = True + storage.download_object.side_effect = Exception("S3 read error") + + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response(b"fresh bytes") + + _, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + assert raw_data == b"fresh bytes" + + async def test_cache_miss_fetches_from_upstream(self, storage, cache, member_cfg): + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + _, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + assert raw_data == b"upstream bytes" + + async def test_cache_miss_stores_result_in_s3(self, storage, cache, member_cfg): + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + storage.upload.assert_called_once() + + async def test_cache_miss_marks_cache_with_configured_ttl(self, storage, cache, member_cfg): + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + cache.mark_index_cached.assert_called_once_with("m", "index.yaml", 3600) + + async def test_cache_miss_with_auth_sends_basic_auth_header(self, storage, cache): + cfg = { + "base_url": "https://private.example.com", + "username": "user", + "password": "pass", + "cache": {"mutable_ttl": 3600}, + } + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + await _get_member_index("m", cfg, "index.yaml", storage, cache) + + headers = mock_client.get.call_args.kwargs["headers"] + assert "Authorization" in headers + assert headers["Authorization"].startswith("Basic ") + + async def test_no_credentials_sends_no_auth_header(self, storage, cache, member_cfg): + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + headers = mock_client.get.call_args.kwargs["headers"] + assert "Authorization" not in headers + + async def test_upstream_fetch_failure_returns_none(self, storage, cache, member_cfg): + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.side_effect = Exception("connection refused") + + _, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + assert raw_data is None + + async def test_s3_upload_failure_still_returns_data(self, storage, cache, member_cfg): + storage.upload.side_effect = Exception("S3 write error") + + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + _, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) + + assert raw_data == b"upstream bytes" + + async def test_returns_ttl_from_config(self, storage, cache): + cfg = {"base_url": "https://example.com", "cache": {"mutable_ttl": 900}} + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + _, _, ttl, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache) + + assert ttl == 900 + + async def test_defaults_ttl_to_3600_when_not_configured(self, storage, cache): + cfg = {"base_url": "https://example.com"} + with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: + mock_client = AsyncMock() + mock_cls.return_value.__aenter__.return_value = mock_client + mock_client.get.return_value = self._fake_response() + + _, _, ttl, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache) + + assert ttl == 3600 + + +# --------------------------------------------------------------------------- +# Virtual route GET /api/v1/virtual/{name}/{path} +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_storage_v(): + m = MagicMock() + m.get_object_key.return_value = "virtual/helm-virtual-test/index.yaml" + m.exists.return_value = False + m.download_object.return_value = b"apiVersion: v1\nentries: {}\n" + return m + + +@pytest.fixture +def mock_cache_v(): + m = MagicMock() + m.is_index_valid.return_value = False + m.available = False + m.client = None + return m + + +@pytest.fixture +def patched_virtual_deps(mock_storage_v, mock_cache_v): + import artifactapi.main as main_mod + + with ( + patch.object(main_mod, "storage", mock_storage_v), + patch.object(main_mod, "cache", mock_cache_v), + ): + yield {"storage": mock_storage_v, "cache": mock_cache_v} + + +class TestVirtualRoute: + def test_unknown_virtual_name_returns_404(self, client, patched_virtual_deps): + response = client.get("/api/v1/virtual/no-such-virtual/index.yaml") + assert response.status_code == 404 + + def test_non_virtual_type_returns_400(self, client, patched_virtual_deps): + # helm-test is type "remote", not "virtual" + response = client.get("/api/v1/virtual/helm-test/index.yaml") + assert response.status_code == 400 + + def test_unsupported_package_returns_400(self, client, patched_virtual_deps): + # unsupported-virtual-test has package "rpm" + response = client.get("/api/v1/virtual/unsupported-virtual-test/index.yaml") + assert response.status_code == 400 + + def test_non_index_path_returns_404(self, client, patched_virtual_deps): + response = client.get("/api/v1/virtual/helm-virtual-test/vault-0.27.0.tgz") + assert response.status_code == 404 + + def test_no_members_returns_500(self, client, patched_virtual_deps): + response = client.get("/api/v1/virtual/empty-virtual-test/index.yaml") + assert response.status_code == 500 + + def test_virtual_cache_hit_returns_200(self, client, patched_virtual_deps): + deps = patched_virtual_deps + deps["storage"].exists.return_value = True + deps["cache"].is_index_valid.return_value = True + + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + assert response.status_code == 200 + + def test_virtual_cache_hit_content_type_is_yaml(self, client, patched_virtual_deps): + deps = patched_virtual_deps + deps["storage"].exists.return_value = True + deps["cache"].is_index_valid.return_value = True + + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + assert "text/yaml" in response.headers["content-type"] + + def test_virtual_cache_hit_returns_stored_content(self, client, patched_virtual_deps): + deps = patched_virtual_deps + deps["storage"].exists.return_value = True + deps["cache"].is_index_valid.return_value = True + deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n" + + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + assert response.content == b"apiVersion: v1\nentries: {}\n" + + def test_virtual_cache_hit_skips_member_fetch(self, client, patched_virtual_deps): + deps = patched_virtual_deps + deps["storage"].exists.return_value = True + deps["cache"].is_index_valid.return_value = True + + with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: + client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + mock_get.assert_not_called() + + def test_cache_miss_returns_200_with_yaml_content_type(self, client, patched_virtual_deps): + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + assert response.status_code == 200 + assert "text/yaml" in response.headers["content-type"] + + def test_cache_miss_response_contains_merged_entries(self, client, patched_virtual_deps): + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + index = yaml.safe_load(response.content) + assert "mychart" in index["entries"] + + def test_cache_miss_stores_result_in_s3(self, client, patched_virtual_deps): + deps = patched_virtual_deps + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + deps["storage"].upload.assert_called_once() + + def test_cache_miss_marks_index_cached(self, client, patched_virtual_deps): + deps = patched_virtual_deps + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + deps["cache"].mark_index_cached.assert_called_once() + + def test_cache_miss_uses_min_ttl_across_members(self, client, patched_virtual_deps): + deps = patched_virtual_deps + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.side_effect = [ + ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE), + ("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE), + ] + client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + _, _, ttl = deps["cache"].mark_index_cached.call_args[0] + assert ttl == 1800 + + def test_all_members_unreachable_returns_502(self, client, patched_virtual_deps): + with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: + mock_get.return_value = ("helm-test", _CFG_A, 3600, None) + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + assert response.status_code == 502 + + def test_one_member_unreachable_still_returns_200(self, client, patched_virtual_deps): + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.side_effect = [ + ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE), + ("helm-member-2", _CFG_B, 1800, None), + ] + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + assert response.status_code == 200 + + def test_member_not_in_config_is_skipped(self, client, patched_virtual_deps): + import artifactapi.main as main_mod + + real_get = main_mod.config.get_remote_config + + def patched_get(name): + return None if name == "helm-member-2" else real_get(name) + + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + patch.object(main_mod.config, "get_remote_config", side_effect=patched_get), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + # only helm-test was available — should succeed + assert response.status_code == 200 + mock_get.assert_called_once() + + def test_s3_store_failure_still_returns_200(self, client, patched_virtual_deps): + deps = patched_virtual_deps + deps["storage"].upload.side_effect = Exception("S3 write error") + + with ( + patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, + patch("artifactapi.artifact.virtual._helm.resolve_content", side_effect=_identity_resolve), + ): + mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE) + response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") + + assert response.status_code == 200