diff --git a/.gitignore b/.gitignore index 50b8125..83b008d 100644 --- a/.gitignore +++ b/.gitignore @@ -35,7 +35,6 @@ env/ # Environment variables .env -remotes.yaml # Logs *.log diff --git a/README.md b/README.md index 8138ee3..d09d4d6 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,13 @@ A generic FastAPI-based artifact caching system that downloads and stores files - **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers) - **Configuration-Based**: YAML configuration for remotes, patterns, and access control -- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz` -- **Pattern Filtering**: Regex-based inclusion patterns for security and organization +- **Direct URL API**: Access cached files via clean URLs like `/api/v1/remote/github/owner/repo/path/file.tar.gz` +- **Immutable/Mutable Pattern Model**: Per-remote regex patterns distinguish forever-cached artifacts from TTL-expiring metadata - **Smart Caching**: Automatic download and cache on first access, serve from cache afterward +- **Conditional Revalidation**: Optional `check_mutable_updates` flag — sends `If-None-Match`/`If-Modified-Since` on expiry; skips re-download on 304 +- **Stale-on-Upstream-Error**: Expired mutable files are kept and their TTL refreshed when the backend cannot be reached, so cached data remains available during upstream outages - **S3 Storage**: MinIO/S3 backend with predictable paths +- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching - **Content-Type Detection**: Automatic MIME type detection for downloads ## Architecture @@ -71,15 +74,18 @@ The system uses `remotes.yaml` to define remote repositories and access patterns remotes: remote-name: base_url: "https://example.com" # Base URL for the remote - type: "remote" # Type: "remote" or "local" - package: "generic" # Package type: "generic", "alpine", "rpm" + type: "remote" # "remote" or "local" + package: "generic" # "generic", "alpine", "rpm", or "docker" description: "Human readable description" - include_patterns: # Regex patterns for allowed files + immutable_patterns: # Files cached forever (release binaries, versioned tags) - "pattern1" - "pattern2" - cache: # Cache configuration (optional) - file_ttl: 0 # File cache TTL (0 = indefinite) - index_ttl: 300 # Index file TTL in seconds + mutable_patterns: # Files that expire after mutable_ttl (optional) + - "pattern3" + check_mutable_updates: false # Enable conditional HEAD before re-fetching (optional) + cache: + immutable_ttl: 0 # TTL for immutable files (0 = indefinitely) + mutable_ttl: 3600 # TTL in seconds for mutable files ``` ### Remote Types @@ -94,30 +100,30 @@ remotes: type: "remote" package: "generic" description: "GitHub releases and files" - include_patterns: + immutable_patterns: - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" - "lxc/incus/.*\\.tar\\.gz$" - "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$" cache: - file_ttl: 0 # Cache files indefinitely - index_ttl: 0 # No index files for generic remotes + immutable_ttl: 0 # Cache files indefinitely - hashicorp-releases: - base_url: "https://releases.hashicorp.com" + github-archive: + base_url: "https://github.com" type: "remote" package: "generic" - description: "HashiCorp product releases" - include_patterns: - - "terraform/.*terraform_.*_linux_amd64\\.zip$" - - "vault/.*vault_.*_linux_amd64\\.zip$" - - "consul/.*/consul_.*_linux_amd64\\.zip$" + description: "GitHub repository archive tarballs" + immutable_patterns: + - ".*/archive/refs/tags/.*\\.tar\\.gz$" # tag archives never change + mutable_patterns: + - ".*/archive/refs/heads/main\\.tar\\.gz$" # branch archives can change + check_mutable_updates: true # send If-None-Match on expiry; skip re-download on 304 cache: - file_ttl: 0 - index_ttl: 0 + immutable_ttl: 0 + mutable_ttl: 86400 # re-check branch archives after 1 day ``` #### Package Repository Remotes -For Linux package repositories with index files: +For Linux package repositories: ```yaml remotes: @@ -126,23 +132,25 @@ remotes: type: "remote" package: "alpine" description: "Alpine Linux APK package repository" - include_patterns: - - ".*/x86_64/.*\\.apk$" # Only x86_64 packages + immutable_patterns: + - ".*/x86_64/.*\\.apk$" # packages are immutable by content-hash + # APKINDEX.tar.gz is a package-type default mutable file — no mutable_patterns needed cache: - file_ttl: 0 # Cache packages indefinitely - index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours + immutable_ttl: 0 + mutable_ttl: 7200 # re-fetch APKINDEX.tar.gz after 2 hours almalinux: - base_url: "http://mirror.aarnet.edu.au/pub/almalinux" + base_url: "https://mirror.example.com/almalinux" type: "remote" package: "rpm" description: "AlmaLinux RPM package repository" - include_patterns: + immutable_patterns: - ".*/x86_64/.*\\.rpm$" - ".*/noarch/.*\\.rpm$" + # repomd.xml and repodata/* are package-type defaults cache: - file_ttl: 0 - index_ttl: 7200 # Cache metadata files for 2 hours + immutable_ttl: 0 + mutable_ttl: 7200 ``` #### Local Repositories @@ -155,62 +163,45 @@ remotes: package: "generic" description: "Local generic file repository" cache: - file_ttl: 0 - index_ttl: 0 + immutable_ttl: 0 + mutable_ttl: 0 ``` -### Include Patterns +### Immutable Patterns -Include patterns are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403. +`immutable_patterns` are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403. + +Matched files are cached with `immutable_ttl` (default 0 = forever). Use these for versioned release artifacts that never change once published. ```yaml -include_patterns: - # Exact project + architecture — most restrictive +immutable_patterns: - "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$" - - # Any release asset for a project, any version - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" - - # File extension only — allow all files of a given type from any path - ".*\\.tar\\.gz$" - - ".*\\.rpm$" - - ".*\\.zip$" - - # Architecture subtree — allow everything under x86_64/ - - ".*/x86_64/.*" - - # Combined: architecture + extension - ".*/x86_64/.*\\.rpm$" - ".*/noarch/.*\\.rpm$" - - # Docker image names (used with package: docker remotes) - - "^library/nginx" # nginx official images only - - "^rancher/" # all rancher/* images - - "^rancher/rke2-runtime" # specific image - - # Repodata directories — allow all metadata for an RPM repo - ".*/repodata/.*$" ``` -**Security note**: Omitting `include_patterns` entirely allows all files from that remote. Index files (e.g. `APKINDEX.tar.gz`, `repomd.xml`, tag manifests) always bypass pattern enforcement — they are served unconditionally so clients can discover available packages. +**Security note**: Omitting `immutable_patterns` entirely allows all files from that remote. -### Index Patterns +### Mutable Patterns -Index patterns identify repository metadata files. Index files get special treatment: -- **Always served** regardless of `include_patterns` -- **Cached with `index_ttl`** instead of `file_ttl` -- **Automatically refreshed** when the TTL expires — the cached copy is evicted and re-fetched on next request +`mutable_patterns` identify files that change over time (index files, branch archives, metadata). Mutable files: +- **Always served** regardless of `immutable_patterns` +- **Cached with `mutable_ttl`** and re-fetched from upstream when the TTL expires +- **Kept stale** when the upstream backend is unreachable — TTL is refreshed automatically so the cached copy remains available until the backend recovers (see below) -Built-in defaults per package type: +Built-in defaults per package type (no configuration needed): -| Package type | Built-in index patterns | +| Package type | Built-in mutable patterns | |---|---| | `alpine` | `APKINDEX\.tar\.gz$` | | `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` | | `docker` | Tag manifests (non-digest refs), `/tags/list` | | `generic` | *(none)* | -Use `index_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically. +Use `mutable_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically. ```yaml remotes: @@ -218,60 +209,74 @@ remotes: base_url: "https://charts.example.com" type: "remote" package: "generic" - include_patterns: - - ".*\\.tgz$" # chart archives - index_patterns: - - "index\\.yaml$" # Helm repo index — re-fetched on every TTL expiry + immutable_patterns: + - ".*\\.tgz$" + mutable_patterns: + - "index\\.yaml$" # Helm repo index cache: - file_ttl: 0 - index_ttl: 600 # re-check the index every 10 minutes + immutable_ttl: 0 + mutable_ttl: 600 # re-check the index every 10 minutes apt-mirror: base_url: "https://apt.example.com" type: "remote" package: "generic" - include_patterns: + immutable_patterns: - ".*\\.deb$" - index_patterns: - - "InRelease$" # signed APT release file - - "Release$" # unsigned APT release file - - "Packages\\.gz$" # compressed package list + mutable_patterns: + - "InRelease$" + - "Release$" + - "Packages\\.gz$" - "Packages\\.xz$" cache: - file_ttl: 0 - index_ttl: 3600 # hourly index refresh - - almalinux-with-extras: - base_url: "https://mirror.example.com/almalinux" - type: "remote" - package: "rpm" # inherits repomd.xml + repodata/* defaults - include_patterns: - - ".*/x86_64/.*\\.rpm$" - - ".*/noarch/.*\\.rpm$" - index_patterns: - - "comps\\.xml$" # optional group metadata (adds to rpm defaults) - cache: - file_ttl: 0 - index_ttl: 7200 + immutable_ttl: 0 + mutable_ttl: 3600 ``` -Pattern matching uses `re.search`, so `"index\\.yaml$"` matches `/stable/index.yaml` and `/index.yaml`. Anchor with `^` to restrict to the path root. +### Conditional Revalidation (`check_mutable_updates`) + +By default, when a mutable file's TTL expires the cached copy is evicted and the full file is re-downloaded on the next request. Setting `check_mutable_updates: true` on a remote enables a cheaper conditional check first: + +1. On TTL expiry, a `HEAD` request is sent to the upstream with `If-None-Match` / `If-Modified-Since` headers (populated from the original download). +2. If the upstream replies **304 Not Modified**, the TTL is refreshed in place — no re-download, no S3 traffic. +3. If the upstream replies **200**, the cached copy is evicted and re-downloaded normally. + +This only applies to user-defined `mutable_patterns`. Package-type built-in patterns (APKINDEX, repomd.xml, Docker manifests) are always re-fetched unconditionally. + +```yaml +remotes: + github-archive: + base_url: "https://github.com" + type: "remote" + package: "generic" + immutable_patterns: + - ".*/archive/refs/tags/.*\\.tar\\.gz$" + mutable_patterns: + - ".*/archive/refs/heads/main\\.tar\\.gz$" + check_mutable_updates: true + cache: + immutable_ttl: 0 + mutable_ttl: 86400 +``` + +### Stale-on-Upstream-Error + +When a mutable file's TTL expires and the upstream backend **cannot be reached** (connection refused, DNS failure, timeout), the cached copy is **kept and its TTL refreshed** rather than evicted. This means: + +- RPM repodata, Alpine indexes, branch archives, and other mutable files remain available during upstream outages. +- Clients continue to receive the last-known-good copy without errors. +- Once the backend recovers and the refreshed TTL next expires, normal eviction resumes. + +This behaviour is automatic and requires no configuration. Only network-level failures trigger it — HTTP error responses (404, 503, etc.) are treated as the backend being reachable and proceed with normal expiry. ### Cache Configuration -Control how long different file types are cached: - ```yaml cache: - file_ttl: 0 # Regular files (0 = cache indefinitely) - index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds) + immutable_ttl: 0 # Immutable files (0 = cache indefinitely, rarely changed) + mutable_ttl: 3600 # Mutable files — TTL in seconds before re-fetch is attempted ``` -**Index Files**: Repository metadata files that change frequently: -- Alpine: `APKINDEX.tar.gz` -- RPM: `repomd.xml`, `*-primary.xml.gz`, etc. -- These are automatically detected and use `index_ttl` - ### Environment Variables All runtime configuration comes from environment variables: @@ -351,26 +356,26 @@ data: type: "remote" package: "generic" description: "GitHub releases and files" - include_patterns: + immutable_patterns: - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" - "lxc/incus/.*\\.tar\\.gz$" - "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$" cache: - file_ttl: 0 - index_ttl: 0 + immutable_ttl: 0 + mutable_ttl: 0 hashicorp-releases: base_url: "https://releases.hashicorp.com" type: "remote" package: "generic" description: "HashiCorp product releases" - include_patterns: + immutable_patterns: - "terraform/.*terraform_.*_linux_amd64\\.zip$" - "vault/.*vault_.*_linux_amd64\\.zip$" - "consul/.*/consul_.*_linux_amd64\\.zip$" cache: - file_ttl: 0 - index_ttl: 0 + immutable_ttl: 0 + mutable_ttl: 0 ``` ### 3. Secret for Environment Variables @@ -778,8 +783,8 @@ remotes: username: "your-dockerhub-username" password: "your-dockerhub-token" # PAT with read scope cache: - file_ttl: 0 - index_ttl: 300 + immutable_ttl: 0 + mutable_ttl: 300 ``` A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API. @@ -804,8 +809,8 @@ remotes: username: "your-github-username" password: "ghp_your_github_pat" # read:packages scope required cache: - file_ttl: 0 - index_ttl: 300 + immutable_ttl: 0 + mutable_ttl: 300 ``` A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`. @@ -844,7 +849,7 @@ Each entry needs a matching remote in `remotes.yaml` using the name from the rew #### Restricting which images are cached -Use `include_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node. +Use `immutable_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node. ```yaml remotes: @@ -852,17 +857,17 @@ remotes: base_url: "https://registry-1.docker.io" type: "remote" package: "docker" - include_patterns: + immutable_patterns: - "^library/nginx" # official nginx only - "^library/redis" # official redis only - "^rancher/" # all rancher images - "^grafana/grafana" # specific image cache: - file_ttl: 0 - index_ttl: 300 + immutable_ttl: 0 + mutable_ttl: 300 ``` -Omit `include_patterns` to allow all images from that registry. +Omit `immutable_patterns` to allow all images from that registry. #### TLS configuration diff --git a/docker-compose.yml b/docker-compose.yml index 1552b8a..81e94ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,12 +6,12 @@ services: context: . dockerfile: Dockerfile args: - - VERSION=dev + - VERSION=2.2.2.dev0 ports: - "8000:8000" volumes: - - ./remotes.yaml:/app/remotes.yaml:ro - - ./ca-bundle.pem:/app/ca-bundle.pem:ro + - ./remotes.yaml:/app/remotes.yaml:ro,z + - ./ca-bundle.pem:/app/ca-bundle.pem:ro,z environment: - CONFIG_PATH=/app/remotes.yaml - DBHOST=postgres diff --git a/remotes.yaml b/remotes.yaml new file mode 100644 index 0000000..d312461 --- /dev/null +++ b/remotes.yaml @@ -0,0 +1,203 @@ +# Example remotes configuration — copy and adapt for your environment. +# +# immutable_patterns: artifacts cached forever (e.g. release binaries, versioned tags). +# mutable_patterns: artifacts that expire after cache.mutable_ttl seconds and are +# re-fetched from upstream on next request (e.g. index files, +# branch archives). Defaults to the package-type built-ins when +# not set (APKINDEX, repomd.xml, Docker manifests, etc.). +# cache: +# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change). +# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600). +# +# WARNING: this file may contain credentials — do not commit real values. +# +# Global configuration +#s3: +# endpoint: "localhost:9000" +# access_key: "minioadmin" +# secret_key: "minioadmin" +# bucket: "artifacts" +# secure: false +# +#redis: +# url: "redis://localhost:6379/0" +# +#database: +# url: "postgresql://artifacts:artifacts123@localhost:5432/artifacts" +# +remotes: + github: + base_url: "https://github.com" + type: "remote" + package: "generic" + description: "GitHub releases and files" + immutable_patterns: + - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" + - "lxc/incus/.*\\.tar\\.gz$" + - "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "VictoriaMetrics/VictoriaMetrics/.*/vmutils-linux-amd64-.*\\.tar\\.gz$" + - "VictoriaMetrics/VictoriaMetrics/.*/victoria-metrics-linux-amd64-.*-cluster\\.tar\\.gz$" + - "VictoriaMetrics/VictoriaMetrics/.*/victoria-logs-linux-amd64-.*\\.tar\\.gz$" + - "VictoriaMetrics/VictoriaMetrics/.*/vlutils-linux-amd64-.*\\.tar\\.gz$" + - "prometheus-community/bind_exporter/.*/bind_exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "prometheus-community/pgbouncer_exporter/.*/pgbouncer_exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "prometheus-community/postgres_exporter/.*/postgres_exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "onedr0p/exportarr/.*/exportarr_.*_linux_amd64\\.tar\\.gz$" + - "tynany/frr_exporter/.*/frr_exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "camptocamp/prometheus-puppetdb-exporter/.*/prometheus-puppetdb-exporter-.*\\.linux-amd64\\.tar\\.gz$" + - "grafana/jsonnet-language-server/.*/jsonnet-language-server_.*_linux_amd64$" + - "helmfile/helmfile/.*/helmfile_.*_linux_amd64\\.tar\\.gz$" + - "helmfile/vals/.*/vals_.*_linux_amd64\\.tar\\.gz$" + - "openbao/openbao-plugins/.*/openbao-plugin-secrets-consul_linux_amd64_.*\\.tar\\.gz$" + - "openbao/openbao-plugins/.*/openbao-plugin-secrets-nomad_linux_amd64_.*\\.tar\\.gz$" + - "apple/foundationdb/.*/libfdb_c\\.x86_64\\.so$" + - "stalwartlabs/stalwart/.*/stalwart-cli-x86_64-unknown-linux-gnu\\.tar\\.gz$" + - "stalwartlabs/stalwart/.*/stalwart-foundationdb-x86_64-unknown-linux-gnu\\.tar\\.gz$" + - "stalwartlabs/stalwart/.*/stalwart-x86_64-unknown-linux-gnu\\.tar\\.gz$" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 0 + + github-archive: + base_url: "https://github.com" + type: "remote" + package: "generic" + description: "GitHub repository archive tarballs" + immutable_patterns: + # Tag archives are immutable — a tag never changes + - ".*/archive/refs/tags/.*\\.tar\\.gz$" + mutable_patterns: + # Branch archives can change on every push + - ".*/archive/refs/heads/main\\.tar\\.gz$" + - ".*/archive/refs/heads/master\\.tar\\.gz$" + # Before re-downloading an expired branch archive, check whether it has + # actually changed (304 Not Modified → just refresh the TTL, no transfer). + # Only applies to user-defined mutable_patterns, not package-type defaults. + check_mutable_updates: true + cache: + immutable_ttl: 0 # Tag archives cached indefinitely + mutable_ttl: 86400 # Branch archives refreshed after 1 day + + gitea-dl: + base_url: "https://dl.gitea.com" + type: "remote" + package: "generic" + description: "Gitea download site" + immutable_patterns: + - "act_runner/.*/act_runner-.*-linux-amd64$" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 0 + + hashicorp-releases: + base_url: "https://releases.hashicorp.com" + type: "remote" + package: "generic" + description: "HashiCorp product releases" + immutable_patterns: + - "terraform/.*terraform_.*_linux_amd64\\.zip$" + - "terraform/.*terraform_.*_windows_amd64\\.zip$" + - "terraform/.*terraform_.*_darwin_amd64\\.zip$" + - "vault/.*vault_.*_linux_amd64\\.zip$" + - "vault/.*vault_.*_windows_amd64\\.zip$" + - "vault/.*vault_.*_darwin_amd64\\.zip$" + - "consul-cni/.*/consul-cni_.*_linux_amd64\\.zip$" + - "consul/.*/consul_.*_linux_amd64\\.zip$" + - "nomad-autoscaler/.*/nomad-autoscaler_.*_linux_amd64\\.zip$" + - "nomad/.*/nomad_.*_linux_amd64\\.zip$" + - "packer/.*/packer_.*_linux_amd64\\.zip$" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 0 + + alpine: + base_url: "https://dl-cdn.alpinelinux.org" + type: "remote" + package: "alpine" + description: "Alpine Linux APK package repository" + immutable_patterns: + - ".*/x86_64/.*\\.apk$" + # check_mutable_updates not set: APKINDEX.tar.gz is a package-type default + # and is always re-fetched on expiry — conditional checks are skipped for + # built-in mutable patterns regardless of this flag. + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 7200 # Index files (APKINDEX.tar.gz) cached for 2 hours + + almalinux: + base_url: "https://gsl-syd.mm.fcix.net/almalinux" + type: "remote" + package: "rpm" + description: "AlmaLinux RPM package repository" + immutable_patterns: + - ".*/x86_64/.*\\.rpm$" + - ".*/noarch/.*\\.rpm$" + - ".*/repodata/.*$" + - ".*\\.rpm$" # Allow all RPM files + # repomd.xml / repodata are package-type defaults — always re-fetched on + # expiry. check_mutable_updates would only apply to any custom + # mutable_patterns added here. + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 7200 # Metadata files cached for 2 hours + + epel: + base_url: "http://mirror.aarnet.edu.au/pub/epel" + type: "remote" + package: "rpm" + description: "EPEL (Extra Packages for Enterprise Linux)" + immutable_patterns: + - "8/Everything/x86_64/.*\\.rpm$" + - "9/Everything/x86_64/.*\\.rpm$" + - "10/Everything/x86_64/.*\\.rpm$" + - ".*/noarch/.*\\.rpm$" + - ".*/repodata/.*$" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 7200 # Metadata files cached for 2 hours + + fedora: + base_url: "https://gsl-syd.mm.fcix.net/fedora/linux" + type: "remote" + package: "rpm" + description: "Fedora Linux RPM package repository" + immutable_patterns: + - "releases/.*/Everything/x86_64/.*\\.rpm$" + - "updates/.*/Everything/x86_64/.*\\.rpm$" + - "development/.*/Everything/x86_64/.*\\.rpm$" + - ".*/noarch/.*\\.rpm$" + - "updates/.*/Everything/x86_64/repodata/.*$" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 300 # Metadata files cached for 5 minutes + + ghcr: + base_url: "https://ghcr.io" + type: "remote" + package: "docker" + description: "GitHub Container Registry" + # username: "your-github-username" + # password: "your-github-pat" # needs read:packages scope + # Docker manifest/tag-list patterns are package-type defaults — always + # re-fetched on expiry. check_mutable_updates only applies to any custom + # mutable_patterns you add (e.g. a metadata endpoint). + cache: + immutable_ttl: 0 + mutable_ttl: 300 + + dockerhub: + base_url: "https://registry-1.docker.io" + type: "remote" + package: "docker" + description: "Docker Hub registry" + cache: + immutable_ttl: 0 + mutable_ttl: 300 + + local-generic: + type: "local" + package: "generic" + description: "Local generic file repository" + cache: + immutable_ttl: 0 # Files cached indefinitely + mutable_ttl: 0 diff --git a/src/artifactapi/cache.py b/src/artifactapi/cache.py index 9379378..9e3940b 100644 --- a/src/artifactapi/cache.py +++ b/src/artifactapi/cache.py @@ -19,18 +19,20 @@ class RedisCache: self.client = None self.available = False - def is_index_file(self, file_path: str, patterns: list[str] | None = None) -> bool: - """Return True if file_path matches any of the index patterns.""" + def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool: + """Return True if file_path matches any of the mutable patterns.""" if patterns is None: patterns = [] return any(re.search(p, file_path) for p in patterns) def get_index_cache_key(self, remote_name: str, path: str) -> str: - """Generate cache key for index files""" return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - def is_index_valid(self, remote_name: str, path: str, ttl_override: int = None) -> bool: - """Check if index file is still valid (not expired)""" + def get_mutable_meta_key(self, remote_name: str, path: str) -> str: + return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" + + def is_index_valid(self, remote_name: str, path: str) -> bool: + """Check if mutable file is still within its TTL window.""" if not self.available: return False @@ -41,7 +43,7 @@ class RedisCache: return False def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None: - """Mark index file as cached with TTL""" + """Set or refresh the TTL key for a mutable file.""" if not self.available: return @@ -51,13 +53,45 @@ class RedisCache: except Exception: pass + def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None: + """Persist ETag and Last-Modified for future conditional requests.""" + if not self.available: + return + data = {} + if etag: + data["etag"] = etag + if last_modified: + data["last_modified"] = last_modified + if not data: + return + try: + self.client.hset(self.get_mutable_meta_key(remote_name, path), mapping=data) + except Exception: + pass + + def get_mutable_meta(self, remote_name: str, path: str) -> dict: + """Return stored ETag/Last-Modified for a mutable file, or {}.""" + if not self.available: + return {} + try: + return self.client.hgetall(self.get_mutable_meta_key(remote_name, path)) or {} + except Exception: + return {} + + def delete_mutable_meta(self, remote_name: str, path: str) -> None: + if not self.available: + return + try: + self.client.delete(self.get_mutable_meta_key(remote_name, path)) + except Exception: + pass + def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None: - """Remove expired index from S3 storage""" + """Remove an expired mutable file from S3 and clear its Redis meta.""" if not self.available: return try: - # Construct the URL the same way as in the main flow import os from .config import ConfigManager @@ -69,9 +103,10 @@ class RedisCache: if remote_config: base_url = remote_config.get("base_url") if base_url: - # Use hierarchical path-based key (same as cache_single_artifact) s3_key = storage.get_object_key(remote_name, path) if storage.exists(s3_key): storage.client.delete_object(Bucket=storage.bucket, Key=s3_key) except Exception: pass + + self.delete_mutable_meta(remote_name, path) diff --git a/src/artifactapi/config.py b/src/artifactapi/config.py index 3a79bc7..ef138f3 100644 --- a/src/artifactapi/config.py +++ b/src/artifactapi/config.py @@ -3,7 +3,7 @@ import os import yaml -_PACKAGE_INDEX_PATTERNS: dict[str, list[str]] = { +_PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = { "alpine": [ r"APKINDEX\.tar\.gz$", ], @@ -55,26 +55,21 @@ class ConfigManager: self._check_reload() return self.config.get("remotes", {}).get(remote_name) - def get_repository_patterns(self, remote_name: str, repo_path: str) -> list: + def get_immutable_patterns(self, remote_name: str, repo_path: str = "") -> list[str]: remote_config = self.get_remote_config(remote_name) if not remote_config: return [] repositories = remote_config.get("repositories", {}) - # Handle both dict (GitHub style) and list (Alpine style) repositories if isinstance(repositories, dict): repo_config = repositories.get(repo_path) if repo_config: - patterns = repo_config.get("include_patterns", []) + patterns = repo_config.get("immutable_patterns", []) else: - patterns = remote_config.get("include_patterns", []) - elif isinstance(repositories, list): - # For Alpine, repositories is just a list of allowed repo names - # Pattern matching is handled by the main include_patterns - patterns = remote_config.get("include_patterns", []) + patterns = remote_config.get("immutable_patterns", []) else: - patterns = remote_config.get("include_patterns", []) + patterns = remote_config.get("immutable_patterns", []) return patterns @@ -129,18 +124,25 @@ class ConfigManager: db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" return {"url": db_url} - def get_index_patterns(self, remote_name: str) -> list[str]: - """Return index-file patterns for a remote. + def get_user_mutable_patterns(self, remote_name: str) -> list[str]: + """Return only user-configured mutable_patterns, excluding package-type defaults.""" + remote_config = self.get_remote_config(remote_name) + if not remote_config: + return [] + return remote_config.get("mutable_patterns", []) + + def get_mutable_patterns(self, remote_name: str) -> list[str]: + """Return mutable-file patterns for a remote (TTL is configured per-remote in cache.index_ttl). Merges the package-level defaults with any extra patterns listed under - ``index_patterns`` in the remote's config. + ``mutable_patterns`` in the remote's config. """ remote_config = self.get_remote_config(remote_name) if not remote_config: return [] package = remote_config.get("package", "generic") - defaults = _PACKAGE_INDEX_PATTERNS.get(package, []) - extra = remote_config.get("index_patterns", []) + defaults = _PACKAGE_MUTABLE_PATTERNS.get(package, []) + extra = remote_config.get("mutable_patterns", []) return defaults + [p for p in extra if p not in defaults] def get_cache_config(self, remote_name: str) -> dict: diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 2173df3..148fc77 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -32,6 +32,10 @@ class ArtifactRequest(BaseModel): include_pattern: str +class UpstreamUnreachable(Exception): + """Raised when the upstream backend cannot be contacted (network or timeout error).""" + + # Configure logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) @@ -87,8 +91,10 @@ def flush_cache( if cache_type in ["all", "index"]: if remote: patterns.append(f"index:{remote}:*") + patterns.append(f"mutable:meta:{remote}:*") else: patterns.append("index:*") + patterns.append("mutable:meta:*") if cache_type in ["all", "metrics"]: if remote: @@ -163,13 +169,13 @@ async def construct_remote_url(remote_name: str, path: str) -> str: async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: str, full_path: str) -> bool: - # First check if this is an index file - always allow index files - index_patterns = config.get_index_patterns(remote_name) - if cache.is_index_file(file_path, index_patterns) or cache.is_index_file(full_path, index_patterns): + # Mutable files (index files) are always allowed through + mutable_patterns = config.get_mutable_patterns(remote_name) + if cache.is_mutable_file(file_path, mutable_patterns) or cache.is_mutable_file(full_path, mutable_patterns): return True - # Then check basic include patterns - patterns = config.get_repository_patterns(remote_name, repo_path) + # Check immutable include patterns + patterns = config.get_immutable_patterns(remote_name, repo_path) if not patterns: return True # Allow all if no patterns configured @@ -183,7 +189,6 @@ async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: s if not pattern_matched: return False - # All remotes now use pattern-based filtering only - no additional checks needed return True @@ -241,12 +246,80 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict: "storage_path": storage_path, "size": len(response.content), "status": "cached", + "etag": response.headers.get("ETag"), + "last_modified": response.headers.get("Last-Modified"), } except Exception as e: return {"url": url, "status": "error", "error": str(e)} +async def _upstream_reachable(url: str) -> bool: + """HEAD with a short timeout. Returns False only on network/timeout errors.""" + try: + async with httpx.AsyncClient(follow_redirects=True) as client: + await client.head(url, timeout=10.0) + return True + except (httpx.NetworkError, httpx.TimeoutException): + return False + except Exception: + return True # 4xx/5xx means backend is up + + +async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool: + """Conditional HEAD against upstream. Returns False only on a definitive 304. + Raises UpstreamUnreachable if the backend cannot be contacted.""" + meta = cache.get_mutable_meta(remote_name, path) + if not meta: + return True + + headers = {} + if meta.get("etag"): + headers["If-None-Match"] = meta["etag"] + if meta.get("last_modified"): + headers["If-Modified-Since"] = meta["last_modified"] + if not headers: + return True + + try: + async with httpx.AsyncClient(follow_redirects=True) as client: + response = await client.head(remote_url, headers=headers) + return response.status_code != 304 + except (httpx.NetworkError, httpx.TimeoutException) as exc: + raise UpstreamUnreachable(str(exc)) from exc + + +async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool: + """Handle an expired mutable file. Returns True if the cached copy is still valid.""" + mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600) + + remote_cfg = config.get_remote_config(remote_name) or {} + check_updates = remote_cfg.get("check_mutable_updates", False) + user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name)) + + if user_mutable: + try: + changed = await check_upstream_changed(remote_url, remote_name, path) + except UpstreamUnreachable: + cache.mark_index_cached(remote_name, path, mutable_ttl) + logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)") + return True + if not changed: + cache.mark_index_cached(remote_name, path, mutable_ttl) + logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)") + return True + logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading") + else: + if not await _upstream_reachable(remote_url): + cache.mark_index_cached(remote_name, path, mutable_ttl) + logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)") + return True + logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache") + + cache.cleanup_expired_index(storage, remote_name, path) + return False + + @app.get("/api/v1/remote/{remote_name}/{path:path}") async def get_artifact(remote_name: str, path: str): # Check if remote is configured @@ -297,17 +370,14 @@ async def get_artifact(remote_name: str, path: str): if not storage.exists(cached_key): cached_key = None - # For index files, check Redis TTL validity + # For mutable files, check Redis TTL validity filename = os.path.basename(path) - is_index = cache.is_index_file(path, config.get_index_patterns(remote_name)) + is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name)) - if cached_key and is_index: - # Index file exists, but check if it's still valid + if cached_key and is_mutable: if not cache.is_index_valid(remote_name, path): - # Index has expired, remove it from S3 - logger.info(f"Index EXPIRED: {remote_name}/{path} - removing from cache") - cache.cleanup_expired_index(storage, remote_name, path) - cached_key = None # Force re-download + if not await handle_expired_mutable(remote_name, path, remote_url): + cached_key = None if cached_key: # Return cached artifact @@ -359,13 +429,14 @@ async def get_artifact(remote_name: str, path: str): logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}") raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}") - # Mark index files as cached in Redis if this was a new download - if result["status"] == "cached" and is_index: - # Get TTL from remote config + # Mark mutable files as cached in Redis with TTL + if result["status"] == "cached" and is_mutable: cache_config = config.get_cache_config(remote_name) - index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes - cache.mark_index_cached(remote_name, path, index_ttl) - logger.info(f"Index file cached with TTL: {remote_name}/{path} (ttl: {index_ttl}s)") + mutable_ttl = cache_config.get("mutable_ttl", 3600) + cache.mark_index_cached(remote_name, path, mutable_ttl) + logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)") + if result.get("etag") or result.get("last_modified"): + cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified")) # Now return the cached artifact try: @@ -424,8 +495,8 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str): if remote_config.get("package") != "docker": raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote") - # Check include_patterns against the image name (e.g. "library/nginx") - patterns = config.get_repository_patterns(remote_name, "") + # Check immutable_patterns against the image name (e.g. "library/nginx") + patterns = config.get_immutable_patterns(remote_name, "") if patterns: path_parts = path.split("/") image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path @@ -439,24 +510,25 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str): if not storage.exists(cached_key): cached_key = None - is_index = cache.is_index_file(path, config.get_index_patterns(remote_name)) + is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name)) - if cached_key and is_index: + if cached_key and is_mutable: if not cache.is_index_valid(remote_name, path): - logger.info(f"Index EXPIRED: {remote_name}/{path} - removing from cache") - cache.cleanup_expired_index(storage, remote_name, path) - cached_key = None + if not await handle_expired_mutable(remote_name, path, remote_url): + cached_key = None if not cached_key: logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}") result = await cache_single_artifact(remote_url, remote_name, path) if result["status"] == "error": raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}") - if result["status"] == "cached" and is_index: + if result["status"] == "cached" and is_mutable: cache_config = config.get_cache_config(remote_name) - index_ttl = cache_config.get("index_ttl", 300) - cache.mark_index_cached(remote_name, path, index_ttl) - logger.info(f"Index file cached with TTL: {remote_name}/{path} (ttl: {index_ttl}s)") + mutable_ttl = cache_config.get("mutable_ttl", 3600) + cache.mark_index_cached(remote_name, path, mutable_ttl) + logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)") + if result.get("etag") or result.get("last_modified"): + cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified")) artifact_data = storage.download_object(storage.get_object_key(remote_name, path)) diff --git a/tests/conftest.py b/tests/conftest.py index a3b2a26..9326244 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,47 +22,55 @@ TEST_REMOTES = { "base_url": "https://dl-cdn.alpinelinux.org", "type": "remote", "package": "alpine", - "include_patterns": [".*/x86_64/.*\\.apk$"], - "cache": {"file_ttl": 0, "index_ttl": 3600}, + "immutable_patterns": [".*/x86_64/.*\\.apk$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, }, "rpm-test": { "base_url": "https://example.com/rpm", "type": "remote", "package": "rpm", - "include_patterns": [".*/x86_64/.*\\.rpm$", ".*/repodata/.*$"], - "cache": {"file_ttl": 0, "index_ttl": 3600}, + "immutable_patterns": [".*/x86_64/.*\\.rpm$", ".*/repodata/.*$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, }, "docker-test": { "base_url": "https://registry.example.com", "type": "remote", "package": "docker", - "cache": {"file_ttl": 0, "index_ttl": 300}, + "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, }, "docker-restricted": { "base_url": "https://registry.example.com", "type": "remote", "package": "docker", - "include_patterns": ["^library/nginx"], - "cache": {"file_ttl": 0, "index_ttl": 300}, + "immutable_patterns": ["^library/nginx"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, }, "generic-test": { "base_url": "https://releases.example.com", "type": "remote", "package": "generic", - "include_patterns": [".*\\.tar\\.gz$"], - "cache": {"file_ttl": 0, "index_ttl": 0}, + "immutable_patterns": [".*\\.tar\\.gz$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, }, "custom-index-test": { "base_url": "https://example.com", "type": "remote", "package": "generic", - "index_patterns": ["metadata\\.json$"], - "cache": {"file_ttl": 0, "index_ttl": 600}, + "mutable_patterns": ["metadata\\.json$"], + "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, + }, + "check-mutable-test": { + "base_url": "https://example.com", + "type": "remote", + "package": "generic", + "mutable_patterns": ["metadata\\.json$"], + "check_mutable_updates": True, + "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, }, "local-test": { "type": "local", "package": "generic", - "cache": {"file_ttl": 0, "index_ttl": 0}, + "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, }, } } diff --git a/tests/test_cache.py b/tests/test_cache.py index 8af80bb..8308e0d 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,4 +1,4 @@ -"""Tests for RedisCache, focusing on is_index_file with configurable patterns.""" +"""Tests for RedisCache, focusing on is_mutable_file with configurable patterns.""" import hashlib from unittest.mock import ANY, MagicMock, patch @@ -6,7 +6,7 @@ from unittest.mock import ANY, MagicMock, patch import pytest from artifactapi.cache import RedisCache -from artifactapi.config import _PACKAGE_INDEX_PATTERNS +from artifactapi.config import _PACKAGE_MUTABLE_PATTERNS @pytest.fixture @@ -38,139 +38,139 @@ def cache_with_redis(mock_redis_client): # --------------------------------------------------------------------------- -# is_index_file — alpine patterns +# is_mutable_file — alpine patterns # --------------------------------------------------------------------------- -class TestIsIndexFileAlpine: +class TestIsMutableFileAlpine: def test_apkindex_tarball_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert bare_cache.is_index_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns) def test_nested_apkindex_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert bare_cache.is_index_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert bare_cache.is_mutable_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns) def test_apk_package_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert not bare_cache.is_index_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns) def test_random_tarball_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert not bare_cache.is_index_file("some/path/archive.tar.gz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert not bare_cache.is_mutable_file("some/path/archive.tar.gz", patterns) def test_apkindex_signature_file_is_not_index(self, bare_cache): # Signature file adjacent to the index should not be treated as an index - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert not bare_cache.is_index_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns) def test_apkindex_tmp_file_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["alpine"] - assert not bare_cache.is_index_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] + assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns) # --------------------------------------------------------------------------- -# is_index_file — rpm patterns +# is_mutable_file — rpm patterns # --------------------------------------------------------------------------- -class TestIsIndexFileRpm: +class TestIsMutableFileRpm: def test_repomd_xml_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("almalinux/9/x86_64/repomd.xml", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("almalinux/9/x86_64/repomd.xml", patterns) def test_repodata_primary_xml_gz_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("repo/repodata/primary.xml.gz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("repo/repodata/primary.xml.gz", patterns) def test_repodata_sqlite_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("repo/repodata/primary.sqlite", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("repo/repodata/primary.sqlite", patterns) def test_repodata_sqlite_bz2_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("repo/repodata/other.sqlite.bz2", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("repo/repodata/other.sqlite.bz2", patterns) def test_repodata_yaml_xz_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("repo/repodata/comps.yaml.xz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("repo/repodata/comps.yaml.xz", patterns) def test_packages_gz_pattern_matches_any_path(self, bare_cache): # The Packages.gz$ regex is a carryover from the original hardcoded logic and # deliberately matches any path ending in Packages.gz — including Debian-style paths. # This test documents that intentional behaviour. - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert bare_cache.is_index_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert bare_cache.is_mutable_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns) def test_rpm_package_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert not bare_cache.is_index_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert not bare_cache.is_mutable_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns) def test_arbitrary_xml_outside_repodata_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["rpm"] - assert not bare_cache.is_index_file("some/path/config.xml", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] + assert not bare_cache.is_mutable_file("some/path/config.xml", patterns) # --------------------------------------------------------------------------- -# is_index_file — docker patterns +# is_mutable_file — docker patterns # --------------------------------------------------------------------------- -class TestIsIndexFileDocker: +class TestIsMutableFileDocker: def test_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert bare_cache.is_index_file("library/nginx/manifests/latest", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert bare_cache.is_mutable_file("library/nginx/manifests/latest", patterns) def test_version_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert bare_cache.is_index_file("library/nginx/manifests/1.25.3", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert bare_cache.is_mutable_file("library/nginx/manifests/1.25.3", patterns) def test_hyphenated_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert bare_cache.is_index_file("library/nginx/manifests/latest-rc", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert bare_cache.is_mutable_file("library/nginx/manifests/latest-rc", patterns) def test_numeric_date_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert bare_cache.is_index_file("library/nginx/manifests/20240101", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert bare_cache.is_mutable_file("library/nginx/manifests/20240101", patterns) def test_digest_manifest_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] digest = "sha256:" + "a" * 64 - assert not bare_cache.is_index_file(f"library/nginx/manifests/{digest}", patterns) + assert not bare_cache.is_mutable_file(f"library/nginx/manifests/{digest}", patterns) def test_tags_list_is_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert bare_cache.is_index_file("library/nginx/tags/list", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert bare_cache.is_mutable_file("library/nginx/tags/list", patterns) def test_blob_is_not_index(self, bare_cache): - patterns = _PACKAGE_INDEX_PATTERNS["docker"] - assert not bare_cache.is_index_file("library/nginx/blobs/sha256:abc123", patterns) + patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] + assert not bare_cache.is_mutable_file("library/nginx/blobs/sha256:abc123", patterns) # --------------------------------------------------------------------------- -# is_index_file — edge cases +# is_mutable_file — edge cases # --------------------------------------------------------------------------- -class TestIsIndexFileEdgeCases: +class TestIsMutableFileEdgeCases: def test_empty_patterns_nothing_is_index(self, bare_cache): - assert not bare_cache.is_index_file("APKINDEX.tar.gz", []) - assert not bare_cache.is_index_file("repomd.xml", []) - assert not bare_cache.is_index_file("library/nginx/manifests/latest", []) + assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", []) + assert not bare_cache.is_mutable_file("repomd.xml", []) + assert not bare_cache.is_mutable_file("library/nginx/manifests/latest", []) def test_none_patterns_nothing_is_index(self, bare_cache): - assert not bare_cache.is_index_file("APKINDEX.tar.gz", None) - assert not bare_cache.is_index_file("repomd.xml", None) + assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", None) + assert not bare_cache.is_mutable_file("repomd.xml", None) def test_custom_patterns_match(self, bare_cache): patterns = [r"metadata\.json$", r"index\.yaml$"] - assert bare_cache.is_index_file("repo/metadata.json", patterns) - assert bare_cache.is_index_file("repo/subdir/index.yaml", patterns) - assert not bare_cache.is_index_file("repo/data.tar.gz", patterns) + assert bare_cache.is_mutable_file("repo/metadata.json", patterns) + assert bare_cache.is_mutable_file("repo/subdir/index.yaml", patterns) + assert not bare_cache.is_mutable_file("repo/data.tar.gz", patterns) def test_custom_pattern_does_not_match_standard_index(self, bare_cache): patterns = [r"metadata\.json$"] - assert not bare_cache.is_index_file("APKINDEX.tar.gz", patterns) + assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", patterns) # --------------------------------------------------------------------------- @@ -235,3 +235,51 @@ class TestIndexValidity: # client is None when Redis is unavailable — setex cannot be called assert unavailable_cache.client is None unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise + + +# --------------------------------------------------------------------------- +# mutable meta (ETag / Last-Modified storage) +# --------------------------------------------------------------------------- + + +class TestMutableMeta: + def test_meta_key_format(self, bare_cache): + path = "repo/metadata.json" + expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] + assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}" + + def test_meta_key_hash_is_16_chars(self, bare_cache): + key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json") + assert len(key.split(":")[-1]) == 16 + + def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client): + mock_redis_client.hgetall.return_value = {"etag": '"abc123"'} + cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None) + mock_redis_client.hset.assert_called_once() + meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json") + assert meta["etag"] == '"abc123"' + + def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client): + lm = "Mon, 01 Jan 2024 00:00:00 GMT" + mock_redis_client.hgetall.return_value = {"last_modified": lm} + cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm) + meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json") + assert meta["last_modified"] == lm + + def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client): + cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None) + mock_redis_client.hset.assert_not_called() + + def test_store_no_op_when_unavailable(self, unavailable_cache): + unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise + + def test_get_returns_empty_when_unavailable(self, unavailable_cache): + assert unavailable_cache.get_mutable_meta("remote", "path") == {} + + def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client): + expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json") + cache_with_redis.delete_mutable_meta("remote", "path/meta.json") + mock_redis_client.delete.assert_called_once_with(expected_key) + + def test_delete_no_op_when_unavailable(self, unavailable_cache): + unavailable_cache.delete_mutable_meta("remote", "path") # must not raise diff --git a/tests/test_config.py b/tests/test_config.py index 3e3a240..9137840 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,4 @@ -"""Tests for ConfigManager, focusing on get_index_patterns (new logic).""" +"""Tests for ConfigManager, focusing on get_mutable_patterns and get_immutable_patterns.""" import os @@ -21,45 +21,44 @@ def make_config(tmp_path): # --------------------------------------------------------------------------- -# get_index_patterns +# get_mutable_patterns # --------------------------------------------------------------------------- -class TestGetIndexPatterns: +class TestGetMutablePatterns: def test_alpine_returns_package_defaults(self, make_config): cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}}) - patterns = cfg.get_index_patterns("r") - # Assert against literal strings, not the live constant, so a rename doesn't mask a regression + patterns = cfg.get_mutable_patterns("r") assert r"APKINDEX\.tar\.gz$" in patterns def test_rpm_returns_package_defaults(self, make_config): cfg = make_config({"r": {"type": "remote", "package": "rpm", "base_url": "https://x.com"}}) - patterns = cfg.get_index_patterns("r") + patterns = cfg.get_mutable_patterns("r") assert r"repomd\.xml$" in patterns assert any("repodata" in p for p in patterns) def test_docker_returns_package_defaults(self, make_config): cfg = make_config({"r": {"type": "remote", "package": "docker", "base_url": "https://x.com"}}) - patterns = cfg.get_index_patterns("r") + patterns = cfg.get_mutable_patterns("r") assert any("manifests" in p for p in patterns) assert any("tags/list" in p for p in patterns) def test_generic_returns_empty_list(self, make_config): cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_index_patterns("r") == [] + assert cfg.get_mutable_patterns("r") == [] def test_unknown_remote_returns_empty_list(self, make_config): cfg = make_config({}) - assert cfg.get_index_patterns("nonexistent") == [] + assert cfg.get_mutable_patterns("nonexistent") == [] def test_missing_package_field_defaults_to_generic(self, make_config): cfg = make_config({"r": {"type": "remote", "base_url": "https://x.com"}}) - assert cfg.get_index_patterns("r") == [] + assert cfg.get_mutable_patterns("r") == [] def test_unknown_package_type_returns_empty_list(self, make_config): # A mis-spelled package type silently returns [] — this is a known footgun cfg = make_config({"r": {"type": "remote", "package": "deb", "base_url": "https://x.com"}}) - assert cfg.get_index_patterns("r") == [] + assert cfg.get_mutable_patterns("r") == [] def test_extra_patterns_appended_after_defaults(self, make_config): cfg = make_config( @@ -68,11 +67,11 @@ class TestGetIndexPatterns: "type": "remote", "package": "alpine", "base_url": "https://x.com", - "index_patterns": [r"custom\.json$"], + "mutable_patterns": [r"custom\.json$"], } } ) - patterns = cfg.get_index_patterns("r") + patterns = cfg.get_mutable_patterns("r") assert r"APKINDEX\.tar\.gz$" in patterns assert r"custom\.json$" in patterns # Defaults come first @@ -85,11 +84,11 @@ class TestGetIndexPatterns: "type": "remote", "package": "alpine", "base_url": "https://x.com", - "index_patterns": [], + "mutable_patterns": [], } } ) - assert r"APKINDEX\.tar\.gz$" in cfg.get_index_patterns("r") + assert r"APKINDEX\.tar\.gz$" in cfg.get_mutable_patterns("r") def test_duplicate_extra_pattern_not_added_twice(self, make_config): existing = r"APKINDEX\.tar\.gz$" @@ -99,11 +98,11 @@ class TestGetIndexPatterns: "type": "remote", "package": "alpine", "base_url": "https://x.com", - "index_patterns": [existing], + "mutable_patterns": [existing], } } ) - patterns = cfg.get_index_patterns("r") + patterns = cfg.get_mutable_patterns("r") assert patterns.count(existing) == 1 def test_generic_with_only_extra_patterns(self, make_config): @@ -113,11 +112,11 @@ class TestGetIndexPatterns: "type": "remote", "package": "generic", "base_url": "https://x.com", - "index_patterns": [r"meta\.json$", r"index\.yaml$"], + "mutable_patterns": [r"meta\.json$", r"index\.yaml$"], } } ) - assert cfg.get_index_patterns("r") == [r"meta\.json$", r"index\.yaml$"] + assert cfg.get_mutable_patterns("r") == [r"meta\.json$", r"index\.yaml$"] def test_rpm_extra_patterns_merged(self, make_config): cfg = make_config( @@ -126,41 +125,41 @@ class TestGetIndexPatterns: "type": "remote", "package": "rpm", "base_url": "https://x.com", - "index_patterns": [r"custom-meta\.xml$"], + "mutable_patterns": [r"custom-meta\.xml$"], } } ) - patterns = cfg.get_index_patterns("r") + patterns = cfg.get_mutable_patterns("r") assert r"repomd\.xml$" in patterns assert r"custom-meta\.xml$" in patterns # --------------------------------------------------------------------------- -# get_repository_patterns +# get_immutable_patterns # --------------------------------------------------------------------------- -class TestGetRepositoryPatterns: - def test_returns_include_patterns(self, make_config): +class TestGetImmutablePatterns: + def test_returns_immutable_patterns(self, make_config): cfg = make_config( { "r": { "type": "remote", "package": "generic", "base_url": "https://x.com", - "include_patterns": [r".*\.tar\.gz$"], + "immutable_patterns": [r".*\.tar\.gz$"], } } ) - assert cfg.get_repository_patterns("r", "") == [r".*\.tar\.gz$"] + assert cfg.get_immutable_patterns("r") == [r".*\.tar\.gz$"] def test_returns_empty_for_missing_remote(self, make_config): cfg = make_config({}) - assert cfg.get_repository_patterns("nonexistent", "") == [] + assert cfg.get_immutable_patterns("nonexistent") == [] def test_returns_empty_when_no_patterns_configured(self, make_config): cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_repository_patterns("r", "") == [] + assert cfg.get_immutable_patterns("r") == [] def test_multiple_patterns_returned(self, make_config): patterns = [r".*\.rpm$", r".*/repodata/.*$"] @@ -170,11 +169,11 @@ class TestGetRepositoryPatterns: "type": "remote", "package": "rpm", "base_url": "https://x.com", - "include_patterns": patterns, + "immutable_patterns": patterns, } } ) - assert cfg.get_repository_patterns("r", "") == patterns + assert cfg.get_immutable_patterns("r") == patterns def test_dict_keyed_repositories_returns_per_repo_patterns(self, make_config): cfg = make_config( @@ -183,14 +182,14 @@ class TestGetRepositoryPatterns: "type": "remote", "package": "generic", "base_url": "https://x.com", - "include_patterns": [r".*\.tar\.gz$"], + "immutable_patterns": [r".*\.tar\.gz$"], "repositories": { - "/path/to/repo": {"include_patterns": [r".*\.rpm$"]}, + "/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]}, }, } } ) - assert cfg.get_repository_patterns("r", "/path/to/repo") == [r".*\.rpm$"] + assert cfg.get_immutable_patterns("r", "/path/to/repo") == [r".*\.rpm$"] def test_dict_keyed_repositories_falls_back_to_remote_patterns(self, make_config): cfg = make_config( @@ -199,14 +198,47 @@ class TestGetRepositoryPatterns: "type": "remote", "package": "generic", "base_url": "https://x.com", - "include_patterns": [r".*\.tar\.gz$"], + "immutable_patterns": [r".*\.tar\.gz$"], "repositories": { - "/path/to/repo": {"include_patterns": [r".*\.rpm$"]}, + "/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]}, }, } } ) - assert cfg.get_repository_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"] + assert cfg.get_immutable_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"] + + +# --------------------------------------------------------------------------- +# get_user_mutable_patterns +# --------------------------------------------------------------------------- + + +class TestGetUserMutablePatterns: + def test_returns_only_user_patterns(self, make_config): + cfg = make_config( + { + "r": { + "type": "remote", + "package": "alpine", + "base_url": "https://x.com", + "mutable_patterns": [r"custom\.json$"], + } + } + ) + assert cfg.get_user_mutable_patterns("r") == [r"custom\.json$"] + + def test_excludes_package_defaults(self, make_config): + # Package defaults (APKINDEX etc.) must NOT appear here + cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}}) + assert cfg.get_user_mutable_patterns("r") == [] + + def test_returns_empty_for_missing_remote(self, make_config): + cfg = make_config({}) + assert cfg.get_user_mutable_patterns("nonexistent") == [] + + def test_returns_empty_when_key_absent(self, make_config): + cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}) + assert cfg.get_user_mutable_patterns("r") == [] # --------------------------------------------------------------------------- @@ -222,11 +254,11 @@ class TestGetCacheConfig: "type": "remote", "package": "generic", "base_url": "https://x.com", - "cache": {"file_ttl": 0, "index_ttl": 7200}, + "cache": {"immutable_ttl": 0, "mutable_ttl": 7200}, } } ) - assert cfg.get_cache_config("r") == {"file_ttl": 0, "index_ttl": 7200} + assert cfg.get_cache_config("r") == {"immutable_ttl": 0, "mutable_ttl": 7200} def test_returns_empty_dict_for_missing_remote(self, make_config): cfg = make_config({}) diff --git a/tests/test_routes.py b/tests/test_routes.py index 3f5b2e5..d2dc643 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -25,7 +25,7 @@ def mock_storage(): @pytest.fixture def mock_cache(): m = MagicMock() - m.is_index_file.return_value = False + m.is_mutable_file.return_value = False m.is_index_valid.return_value = True m.available = False m.client = None @@ -123,7 +123,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True deps["cache"].is_index_valid.return_value = True response = client.get("/v2/docker-restricted/library/nginx/manifests/latest") @@ -140,7 +140,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True deps["cache"].is_index_valid.return_value = True response = client.get("/v2/docker-test/library/nginx/manifests/latest") @@ -158,7 +158,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True deps["cache"].is_index_valid.return_value = True response = client.get("/v2/docker-test/library/nginx/manifests/latest") @@ -170,7 +170,7 @@ class TestDockerProxy: manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False client.get("/v2/docker-test/library/nginx/manifests/latest") deps["metrics"].record_cache_hit.assert_called_once_with("docker-test", ANY) @@ -185,7 +185,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.head("/v2/docker-test/library/nginx/manifests/latest") assert response.status_code == 200 @@ -201,7 +201,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = False deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True with patch( "artifactapi.main.cache_single_artifact", @@ -223,7 +223,7 @@ class TestDockerProxy: ).encode() deps["storage"].exists.return_value = False deps["storage"].download_object.return_value = manifest - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True with patch( "artifactapi.main.cache_single_artifact", @@ -244,16 +244,17 @@ class TestDockerProxy: } ).encode() deps["storage"].exists.return_value = True # cached in S3 - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True deps["cache"].is_index_valid.return_value = False # but TTL expired deps["storage"].download_object.return_value = manifest - with patch( - "artifactapi.main.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/v2/docker-test/library/nginx/manifests/latest") + with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True): + with patch( + "artifactapi.main.cache_single_artifact", + new_callable=AsyncMock, + return_value={"status": "cached"}, + ) as mock_fetch: + response = client.get("/v2/docker-test/library/nginx/manifests/latest") mock_fetch.assert_called_once() assert response.status_code == 200 @@ -278,7 +279,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"tar content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") assert response.status_code == 200 @@ -289,7 +290,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") disposition = response.headers["content-disposition"] @@ -301,7 +302,7 @@ class TestGenericArtifactRoute: content = b"some artifact content bytes" deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = content - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") assert response.headers["X-Artifact-Size"] == str(len(content)) @@ -310,7 +311,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") deps["metrics"].record_cache_hit.assert_called_once_with("generic-test", ANY) @@ -319,7 +320,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") deps["database"].record_artifact_mapping.assert_called_once() @@ -328,7 +329,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"rpm bytes" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.get("/api/v1/remote/rpm-test/almalinux/9/x86_64/bash-5.1.8.x86_64.rpm") assert response.status_code == 200 @@ -338,7 +339,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False response = client.get("/api/v1/remote/rpm-test/repo/repodata/primary.xml") assert response.status_code == 200 @@ -348,7 +349,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = False deps["storage"].download_object.return_value = b"fresh content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False with patch( "artifactapi.main.cache_single_artifact", @@ -365,7 +366,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = False deps["storage"].download_object.return_value = b"fresh content" - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False with patch( "artifactapi.main.cache_single_artifact", @@ -380,7 +381,7 @@ class TestGenericArtifactRoute: deps = patched_deps deps["storage"].exists.return_value = False deps["storage"].download_object.return_value = b"APKINDEX content" - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True with patch( "artifactapi.main.cache_single_artifact", @@ -395,7 +396,7 @@ class TestGenericArtifactRoute: def test_upstream_error_returns_502(self, client, patched_deps): deps = patched_deps deps["storage"].exists.return_value = False - deps["cache"].is_index_file.return_value = False + deps["cache"].is_mutable_file.return_value = False with patch( "artifactapi.main.cache_single_artifact", @@ -406,19 +407,116 @@ class TestGenericArtifactRoute: assert response.status_code == 502 - def test_index_file_bypasses_include_patterns(self, client, patched_deps): - """Index files must be served even when they don't match include_patterns.""" + def test_mutable_file_bypasses_immutable_patterns(self, client, patched_deps): + """Mutable files must be served even when they don't match immutable_patterns.""" deps = patched_deps deps["storage"].exists.return_value = True deps["storage"].download_object.return_value = b"APKINDEX content" - deps["cache"].is_index_file.return_value = True + deps["cache"].is_mutable_file.return_value = True deps["cache"].is_index_valid.return_value = True - # APKINDEX.tar.gz does not match alpine-test's include_patterns (.*.apk$), - # but since is_index_file returns True it must be allowed through. + # APKINDEX.tar.gz does not match alpine-test's immutable_patterns (.*.apk$), + # but since is_mutable_file returns True it must be allowed through. response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz") assert response.status_code == 200 + def test_mutable_unchanged_refreshes_ttl_without_redownload(self, client, patched_deps): + """When check_mutable_updates=True and upstream says 304, TTL is refreshed in place.""" + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"metadata content" + # File is mutable and its TTL has expired + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} + + with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=False): + response = client.get("/api/v1/remote/check-mutable-test/metadata.json") + + assert response.status_code == 200 + deps["cache"].mark_index_cached.assert_called() + # S3 object must NOT have been deleted (no re-download) + deps["storage"].client.delete_object.assert_not_called() + + def test_mutable_changed_triggers_redownload(self, client, patched_deps): + """When check_mutable_updates=True and upstream says 200, cache is invalidated.""" + deps = patched_deps + deps["storage"].exists.return_value = False + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} + + with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True): + with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache: + mock_cache.return_value = {"status": "error", "error": "upstream gone"} + response = client.get("/api/v1/remote/check-mutable-test/metadata.json") + + assert response.status_code == 502 + + def test_mutable_changed_redownloads_successfully(self, client, patched_deps): + """When check_mutable_updates=True and upstream says 200, fresh copy is fetched and served.""" + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"fresh metadata" + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} + + with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True): + with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache: + mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None} + response = client.get("/api/v1/remote/check-mutable-test/metadata.json") + + assert response.status_code == 200 + mock_cache.assert_called_once() + + def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps): + """When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed.""" + from artifactapi.main import UpstreamUnreachable + + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"stale metadata" + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} + + with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")): + response = client.get("/api/v1/remote/check-mutable-test/metadata.json") + + assert response.status_code == 200 + deps["cache"].mark_index_cached.assert_called() + deps["storage"].client.delete_object.assert_not_called() + + def test_mutable_backend_unreachable_on_expiry_keeps_stale(self, client, patched_deps): + """When a regular mutable file expires and backend is unreachable, stale copy is kept and TTL refreshed.""" + deps = patched_deps + deps["storage"].exists.return_value = True + deps["storage"].download_object.return_value = b"stale APKINDEX" + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + + with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False): + response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz") + + assert response.status_code == 200 + deps["cache"].mark_index_cached.assert_called() + deps["storage"].client.delete_object.assert_not_called() + + def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps): + """When check_mutable_updates is not set, expired mutable files are always re-fetched.""" + deps = patched_deps + deps["storage"].exists.return_value = False + deps["cache"].is_mutable_file.return_value = True + deps["cache"].is_index_valid.return_value = False + + with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock) as mock_check: + with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache: + mock_cache.return_value = {"status": "error", "error": "upstream gone"} + client.get("/api/v1/remote/custom-index-test/metadata.json") + + mock_check.assert_not_called() + def test_local_repo_file_not_found_returns_404(self, client, patched_deps): deps = patched_deps deps["database"].get_local_file_metadata.return_value = None @@ -519,8 +617,8 @@ class TestCacheFlushEndpoint: deps["cache"].available = True redis_mock = MagicMock() deps["cache"].client = redis_mock - # First pattern (index:*) returns keys; subsequent pattern returns nothing - redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], []] + # index:* returns keys; mutable:meta:* and metrics:* return nothing + redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], [], []] deps["storage"].client.list_objects_v2.return_value = {} response = client.put("/cache/flush")