feat: immutable/mutable caching patterns with conditional revalidation and stale fallback #14
@@ -6,10 +6,13 @@ A generic FastAPI-based artifact caching system that downloads and stores files
|
|||||||
|
|
||||||
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
||||||
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
||||||
- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz`
|
- **Direct URL API**: Access cached files via clean URLs like `/api/v1/remote/github/owner/repo/path/file.tar.gz`
|
||||||
- **Pattern Filtering**: Regex-based inclusion patterns for security and organization
|
- **Immutable/Mutable Pattern Model**: Per-remote regex patterns distinguish forever-cached artifacts from TTL-expiring metadata
|
||||||
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
||||||
|
- **Conditional Revalidation**: Optional `check_mutable_updates` flag — sends `If-None-Match`/`If-Modified-Since` on expiry; skips re-download on 304
|
||||||
|
- **Stale-on-Upstream-Error**: Expired mutable files are kept and their TTL refreshed when the backend cannot be reached, so cached data remains available during upstream outages
|
||||||
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
||||||
|
- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching
|
||||||
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
@@ -71,15 +74,18 @@ The system uses `remotes.yaml` to define remote repositories and access patterns
|
|||||||
remotes:
|
remotes:
|
||||||
remote-name:
|
remote-name:
|
||||||
base_url: "https://example.com" # Base URL for the remote
|
base_url: "https://example.com" # Base URL for the remote
|
||||||
type: "remote" # Type: "remote" or "local"
|
type: "remote" # "remote" or "local"
|
||||||
package: "generic" # Package type: "generic", "alpine", "rpm"
|
package: "generic" # "generic", "alpine", "rpm", or "docker"
|
||||||
description: "Human readable description"
|
description: "Human readable description"
|
||||||
include_patterns: # Regex patterns for allowed files
|
immutable_patterns: # Files cached forever (release binaries, versioned tags)
|
||||||
- "pattern1"
|
- "pattern1"
|
||||||
- "pattern2"
|
- "pattern2"
|
||||||
cache: # Cache configuration (optional)
|
mutable_patterns: # Files that expire after mutable_ttl (optional)
|
||||||
file_ttl: 0 # File cache TTL (0 = indefinite)
|
- "pattern3"
|
||||||
index_ttl: 300 # Index file TTL in seconds
|
check_mutable_updates: false # Enable conditional HEAD before re-fetching (optional)
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # TTL for immutable files (0 = indefinitely)
|
||||||
|
mutable_ttl: 3600 # TTL in seconds for mutable files
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remote Types
|
### Remote Types
|
||||||
@@ -94,30 +100,30 @@ remotes:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "GitHub releases and files"
|
description: "GitHub releases and files"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache files indefinitely
|
immutable_ttl: 0 # Cache files indefinitely
|
||||||
index_ttl: 0 # No index files for generic remotes
|
|
||||||
|
|
||||||
hashicorp-releases:
|
github-archive:
|
||||||
base_url: "https://releases.hashicorp.com"
|
base_url: "https://github.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "HashiCorp product releases"
|
description: "GitHub repository archive tarballs"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$" # tag archives never change
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
mutable_patterns:
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
- ".*/archive/refs/heads/main\\.tar\\.gz$" # branch archives can change
|
||||||
|
check_mutable_updates: true # send If-None-Match on expiry; skip re-download on 304
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 86400 # re-check branch archives after 1 day
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Package Repository Remotes
|
#### Package Repository Remotes
|
||||||
For Linux package repositories with index files:
|
For Linux package repositories:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -126,23 +132,25 @@ remotes:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "alpine"
|
package: "alpine"
|
||||||
description: "Alpine Linux APK package repository"
|
description: "Alpine Linux APK package repository"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*/x86_64/.*\\.apk$" # Only x86_64 packages
|
- ".*/x86_64/.*\\.apk$" # packages are immutable by content-hash
|
||||||
|
# APKINDEX.tar.gz is a package-type default mutable file — no mutable_patterns needed
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache packages indefinitely
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours
|
mutable_ttl: 7200 # re-fetch APKINDEX.tar.gz after 2 hours
|
||||||
|
|
||||||
almalinux:
|
almalinux:
|
||||||
base_url: "http://mirror.aarnet.edu.au/pub/almalinux"
|
base_url: "https://mirror.example.com/almalinux"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "rpm"
|
package: "rpm"
|
||||||
description: "AlmaLinux RPM package repository"
|
description: "AlmaLinux RPM package repository"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
- ".*/noarch/.*\\.rpm$"
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
# repomd.xml and repodata/* are package-type defaults
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache metadata files for 2 hours
|
mutable_ttl: 7200
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Local Repositories
|
#### Local Repositories
|
||||||
@@ -155,62 +163,45 @@ remotes:
|
|||||||
package: "generic"
|
package: "generic"
|
||||||
description: "Local generic file repository"
|
description: "Local generic file repository"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
### Include Patterns
|
### Immutable Patterns
|
||||||
|
|
||||||
Include patterns are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403.
|
`immutable_patterns` are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403.
|
||||||
|
|
||||||
|
Matched files are cached with `immutable_ttl` (default 0 = forever). Use these for versioned release artifacts that never change once published.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
# Exact project + architecture — most restrictive
|
|
||||||
- "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$"
|
- "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$"
|
||||||
|
|
||||||
# Any release asset for a project, any version
|
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
|
||||||
# File extension only — allow all files of a given type from any path
|
|
||||||
- ".*\\.tar\\.gz$"
|
- ".*\\.tar\\.gz$"
|
||||||
- ".*\\.rpm$"
|
|
||||||
- ".*\\.zip$"
|
|
||||||
|
|
||||||
# Architecture subtree — allow everything under x86_64/
|
|
||||||
- ".*/x86_64/.*"
|
|
||||||
|
|
||||||
# Combined: architecture + extension
|
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
- ".*/noarch/.*\\.rpm$"
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
|
||||||
# Docker image names (used with package: docker remotes)
|
|
||||||
- "^library/nginx" # nginx official images only
|
|
||||||
- "^rancher/" # all rancher/* images
|
|
||||||
- "^rancher/rke2-runtime" # specific image
|
|
||||||
|
|
||||||
# Repodata directories — allow all metadata for an RPM repo
|
|
||||||
- ".*/repodata/.*$"
|
- ".*/repodata/.*$"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Security note**: Omitting `include_patterns` entirely allows all files from that remote. Index files (e.g. `APKINDEX.tar.gz`, `repomd.xml`, tag manifests) always bypass pattern enforcement — they are served unconditionally so clients can discover available packages.
|
**Security note**: Omitting `immutable_patterns` entirely allows all files from that remote.
|
||||||
|
|
||||||
### Index Patterns
|
### Mutable Patterns
|
||||||
|
|
||||||
Index patterns identify repository metadata files. Index files get special treatment:
|
`mutable_patterns` identify files that change over time (index files, branch archives, metadata). Mutable files:
|
||||||
- **Always served** regardless of `include_patterns`
|
- **Always served** regardless of `immutable_patterns`
|
||||||
- **Cached with `index_ttl`** instead of `file_ttl`
|
- **Cached with `mutable_ttl`** and re-fetched from upstream when the TTL expires
|
||||||
- **Automatically refreshed** when the TTL expires — the cached copy is evicted and re-fetched on next request
|
- **Kept stale** when the upstream backend is unreachable — TTL is refreshed automatically so the cached copy remains available until the backend recovers (see below)
|
||||||
|
|
||||||
Built-in defaults per package type:
|
Built-in defaults per package type (no configuration needed):
|
||||||
|
|
||||||
| Package type | Built-in index patterns |
|
| Package type | Built-in mutable patterns |
|
||||||
|---|---|
|
|---|---|
|
||||||
| `alpine` | `APKINDEX\.tar\.gz$` |
|
| `alpine` | `APKINDEX\.tar\.gz$` |
|
||||||
| `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` |
|
| `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` |
|
||||||
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
||||||
| `generic` | *(none)* |
|
| `generic` | *(none)* |
|
||||||
|
|
||||||
Use `index_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically.
|
Use `mutable_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -218,60 +209,74 @@ remotes:
|
|||||||
base_url: "https://charts.example.com"
|
base_url: "https://charts.example.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*\\.tgz$" # chart archives
|
- ".*\\.tgz$"
|
||||||
index_patterns:
|
mutable_patterns:
|
||||||
- "index\\.yaml$" # Helm repo index — re-fetched on every TTL expiry
|
- "index\\.yaml$" # Helm repo index
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 600 # re-check the index every 10 minutes
|
mutable_ttl: 600 # re-check the index every 10 minutes
|
||||||
|
|
||||||
apt-mirror:
|
apt-mirror:
|
||||||
base_url: "https://apt.example.com"
|
base_url: "https://apt.example.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*\\.deb$"
|
- ".*\\.deb$"
|
||||||
index_patterns:
|
mutable_patterns:
|
||||||
- "InRelease$" # signed APT release file
|
- "InRelease$"
|
||||||
- "Release$" # unsigned APT release file
|
- "Release$"
|
||||||
- "Packages\\.gz$" # compressed package list
|
- "Packages\\.gz$"
|
||||||
- "Packages\\.xz$"
|
- "Packages\\.xz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 3600 # hourly index refresh
|
mutable_ttl: 3600
|
||||||
|
|
||||||
almalinux-with-extras:
|
|
||||||
base_url: "https://mirror.example.com/almalinux"
|
|
||||||
type: "remote"
|
|
||||||
package: "rpm" # inherits repomd.xml + repodata/* defaults
|
|
||||||
include_patterns:
|
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
|
||||||
- ".*/noarch/.*\\.rpm$"
|
|
||||||
index_patterns:
|
|
||||||
- "comps\\.xml$" # optional group metadata (adds to rpm defaults)
|
|
||||||
cache:
|
|
||||||
file_ttl: 0
|
|
||||||
index_ttl: 7200
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Pattern matching uses `re.search`, so `"index\\.yaml$"` matches `/stable/index.yaml` and `/index.yaml`. Anchor with `^` to restrict to the path root.
|
### Conditional Revalidation (`check_mutable_updates`)
|
||||||
|
|
||||||
|
By default, when a mutable file's TTL expires the cached copy is evicted and the full file is re-downloaded on the next request. Setting `check_mutable_updates: true` on a remote enables a cheaper conditional check first:
|
||||||
|
|
||||||
|
1. On TTL expiry, a `HEAD` request is sent to the upstream with `If-None-Match` / `If-Modified-Since` headers (populated from the original download).
|
||||||
|
2. If the upstream replies **304 Not Modified**, the TTL is refreshed in place — no re-download, no S3 traffic.
|
||||||
|
3. If the upstream replies **200**, the cached copy is evicted and re-downloaded normally.
|
||||||
|
|
||||||
|
This only applies to user-defined `mutable_patterns`. Package-type built-in patterns (APKINDEX, repomd.xml, Docker manifests) are always re-fetched unconditionally.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
github-archive:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$"
|
||||||
|
mutable_patterns:
|
||||||
|
- ".*/archive/refs/heads/main\\.tar\\.gz$"
|
||||||
|
check_mutable_updates: true
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 86400
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stale-on-Upstream-Error
|
||||||
|
|
||||||
|
When a mutable file's TTL expires and the upstream backend **cannot be reached** (connection refused, DNS failure, timeout), the cached copy is **kept and its TTL refreshed** rather than evicted. This means:
|
||||||
|
|
||||||
|
- RPM repodata, Alpine indexes, branch archives, and other mutable files remain available during upstream outages.
|
||||||
|
- Clients continue to receive the last-known-good copy without errors.
|
||||||
|
- Once the backend recovers and the refreshed TTL next expires, normal eviction resumes.
|
||||||
|
|
||||||
|
This behaviour is automatic and requires no configuration. Only network-level failures trigger it — HTTP error responses (404, 503, etc.) are treated as the backend being reachable and proceed with normal expiry.
|
||||||
|
|
||||||
### Cache Configuration
|
### Cache Configuration
|
||||||
|
|
||||||
Control how long different file types are cached:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Regular files (0 = cache indefinitely)
|
immutable_ttl: 0 # Immutable files (0 = cache indefinitely, rarely changed)
|
||||||
index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds)
|
mutable_ttl: 3600 # Mutable files — TTL in seconds before re-fetch is attempted
|
||||||
```
|
```
|
||||||
|
|
||||||
**Index Files**: Repository metadata files that change frequently:
|
|
||||||
- Alpine: `APKINDEX.tar.gz`
|
|
||||||
- RPM: `repomd.xml`, `*-primary.xml.gz`, etc.
|
|
||||||
- These are automatically detected and use `index_ttl`
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
All runtime configuration comes from environment variables:
|
All runtime configuration comes from environment variables:
|
||||||
@@ -351,26 +356,26 @@ data:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "GitHub releases and files"
|
description: "GitHub releases and files"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
|
|
||||||
hashicorp-releases:
|
hashicorp-releases:
|
||||||
base_url: "https://releases.hashicorp.com"
|
base_url: "https://releases.hashicorp.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "HashiCorp product releases"
|
description: "HashiCorp product releases"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Secret for Environment Variables
|
### 3. Secret for Environment Variables
|
||||||
@@ -778,8 +783,8 @@ remotes:
|
|||||||
username: "your-dockerhub-username"
|
username: "your-dockerhub-username"
|
||||||
password: "your-dockerhub-token" # PAT with read scope
|
password: "your-dockerhub-token" # PAT with read scope
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API.
|
A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API.
|
||||||
@@ -804,8 +809,8 @@ remotes:
|
|||||||
username: "your-github-username"
|
username: "your-github-username"
|
||||||
password: "ghp_your_github_pat" # read:packages scope required
|
password: "ghp_your_github_pat" # read:packages scope required
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`.
|
A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`.
|
||||||
@@ -844,7 +849,7 @@ Each entry needs a matching remote in `remotes.yaml` using the name from the rew
|
|||||||
|
|
||||||
#### Restricting which images are cached
|
#### Restricting which images are cached
|
||||||
|
|
||||||
Use `include_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node.
|
Use `immutable_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -852,17 +857,17 @@ remotes:
|
|||||||
base_url: "https://registry-1.docker.io"
|
base_url: "https://registry-1.docker.io"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "docker"
|
package: "docker"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "^library/nginx" # official nginx only
|
- "^library/nginx" # official nginx only
|
||||||
- "^library/redis" # official redis only
|
- "^library/redis" # official redis only
|
||||||
- "^rancher/" # all rancher images
|
- "^rancher/" # all rancher images
|
||||||
- "^grafana/grafana" # specific image
|
- "^grafana/grafana" # specific image
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
Omit `include_patterns` to allow all images from that registry.
|
Omit `immutable_patterns` to allow all images from that registry.
|
||||||
|
|
||||||
#### TLS configuration
|
#### TLS configuration
|
||||||
|
|
||||||
|
|||||||
+33
-4
@@ -32,6 +32,10 @@ class ArtifactRequest(BaseModel):
|
|||||||
include_pattern: str
|
include_pattern: str
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamUnreachable(Exception):
|
||||||
|
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||||
|
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -250,8 +254,21 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
|||||||
return {"url": url, "status": "error", "error": str(e)}
|
return {"url": url, "status": "error", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _upstream_reachable(url: str) -> bool:
|
||||||
|
"""HEAD with a short timeout. Returns False only on network/timeout errors."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
await client.head(url, timeout=10.0)
|
||||||
|
return True
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException):
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return True # 4xx/5xx means backend is up
|
||||||
|
|
||||||
|
|
||||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
|
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
|
||||||
"""Conditional HEAD against upstream. Returns False only on a definitive 304."""
|
"""Conditional HEAD against upstream. Returns False only on a definitive 304.
|
||||||
|
Raises UpstreamUnreachable if the backend cannot be contacted."""
|
||||||
meta = cache.get_mutable_meta(remote_name, path)
|
meta = cache.get_mutable_meta(remote_name, path)
|
||||||
if not meta:
|
if not meta:
|
||||||
return True
|
return True
|
||||||
@@ -268,25 +285,37 @@ async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -
|
|||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
response = await client.head(remote_url, headers=headers)
|
response = await client.head(remote_url, headers=headers)
|
||||||
return response.status_code != 304
|
return response.status_code != 304
|
||||||
except Exception:
|
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||||
return True
|
raise UpstreamUnreachable(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
||||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||||
|
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||||
|
|
||||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||||
|
|
||||||
if user_mutable:
|
if user_mutable:
|
||||||
|
try:
|
||||||
changed = await check_upstream_changed(remote_url, remote_name, path)
|
changed = await check_upstream_changed(remote_url, remote_name, path)
|
||||||
|
except UpstreamUnreachable:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
if not changed:
|
if not changed:
|
||||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||||
return True
|
return True
|
||||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||||
else:
|
else:
|
||||||
|
if not await _upstream_reachable(remote_url):
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||||
|
|
||||||
cache.cleanup_expired_index(storage, remote_name, path)
|
cache.cleanup_expired_index(storage, remote_name, path)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -248,6 +248,7 @@ class TestDockerProxy:
|
|||||||
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
||||||
deps["storage"].download_object.return_value = manifest
|
deps["storage"].download_object.return_value = manifest
|
||||||
|
|
||||||
|
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.main.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
@@ -469,6 +470,39 @@ class TestGenericArtifactRoute:
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
mock_cache.assert_called_once()
|
mock_cache.assert_called_once()
|
||||||
|
|
||||||
|
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
||||||
|
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||||
|
from artifactapi.main import UpstreamUnreachable
|
||||||
|
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"stale metadata"
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
|
with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||||
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
deps["cache"].mark_index_cached.assert_called()
|
||||||
|
deps["storage"].client.delete_object.assert_not_called()
|
||||||
|
|
||||||
|
def test_mutable_backend_unreachable_on_expiry_keeps_stale(self, client, patched_deps):
|
||||||
|
"""When a regular mutable file expires and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"stale APKINDEX"
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
|
||||||
|
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||||
|
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
deps["cache"].mark_index_cached.assert_called()
|
||||||
|
deps["storage"].client.delete_object.assert_not_called()
|
||||||
|
|
||||||
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
|
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
|
||||||
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
|
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
|
||||||
deps = patched_deps
|
deps = patched_deps
|
||||||
|
|||||||
Reference in New Issue
Block a user