feat: keep stale mutables when upstream is unreachable; update README
When a mutable file's TTL expires and the upstream backend cannot be contacted (network error or timeout), the cached copy is kept and its TTL refreshed instead of being evicted. This keeps RPM repodata, Alpine indexes, branch archives, and other mutable data available during upstream outages. Adds UpstreamUnreachable exception and _upstream_reachable() helper. check_upstream_changed() now raises UpstreamUnreachable on network errors (was silently returning True). handle_expired_mutable() catches the exception on the check_mutable_updates path and calls _upstream_reachable() on the plain-expiry path. README updated to current immutable/mutable terminology and documents all new caching features.
This commit is contained in:
@@ -6,10 +6,13 @@ A generic FastAPI-based artifact caching system that downloads and stores files
|
|||||||
|
|
||||||
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
||||||
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
||||||
- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz`
|
- **Direct URL API**: Access cached files via clean URLs like `/api/v1/remote/github/owner/repo/path/file.tar.gz`
|
||||||
- **Pattern Filtering**: Regex-based inclusion patterns for security and organization
|
- **Immutable/Mutable Pattern Model**: Per-remote regex patterns distinguish forever-cached artifacts from TTL-expiring metadata
|
||||||
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
||||||
|
- **Conditional Revalidation**: Optional `check_mutable_updates` flag — sends `If-None-Match`/`If-Modified-Since` on expiry; skips re-download on 304
|
||||||
|
- **Stale-on-Upstream-Error**: Expired mutable files are kept and their TTL refreshed when the backend cannot be reached, so cached data remains available during upstream outages
|
||||||
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
||||||
|
- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching
|
||||||
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
@@ -71,15 +74,18 @@ The system uses `remotes.yaml` to define remote repositories and access patterns
|
|||||||
remotes:
|
remotes:
|
||||||
remote-name:
|
remote-name:
|
||||||
base_url: "https://example.com" # Base URL for the remote
|
base_url: "https://example.com" # Base URL for the remote
|
||||||
type: "remote" # Type: "remote" or "local"
|
type: "remote" # "remote" or "local"
|
||||||
package: "generic" # Package type: "generic", "alpine", "rpm"
|
package: "generic" # "generic", "alpine", "rpm", or "docker"
|
||||||
description: "Human readable description"
|
description: "Human readable description"
|
||||||
include_patterns: # Regex patterns for allowed files
|
immutable_patterns: # Files cached forever (release binaries, versioned tags)
|
||||||
- "pattern1"
|
- "pattern1"
|
||||||
- "pattern2"
|
- "pattern2"
|
||||||
cache: # Cache configuration (optional)
|
mutable_patterns: # Files that expire after mutable_ttl (optional)
|
||||||
file_ttl: 0 # File cache TTL (0 = indefinite)
|
- "pattern3"
|
||||||
index_ttl: 300 # Index file TTL in seconds
|
check_mutable_updates: false # Enable conditional HEAD before re-fetching (optional)
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # TTL for immutable files (0 = indefinitely)
|
||||||
|
mutable_ttl: 3600 # TTL in seconds for mutable files
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remote Types
|
### Remote Types
|
||||||
@@ -94,30 +100,30 @@ remotes:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "GitHub releases and files"
|
description: "GitHub releases and files"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache files indefinitely
|
immutable_ttl: 0 # Cache files indefinitely
|
||||||
index_ttl: 0 # No index files for generic remotes
|
|
||||||
|
|
||||||
hashicorp-releases:
|
github-archive:
|
||||||
base_url: "https://releases.hashicorp.com"
|
base_url: "https://github.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "HashiCorp product releases"
|
description: "GitHub repository archive tarballs"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$" # tag archives never change
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
mutable_patterns:
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
- ".*/archive/refs/heads/main\\.tar\\.gz$" # branch archives can change
|
||||||
|
check_mutable_updates: true # send If-None-Match on expiry; skip re-download on 304
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 86400 # re-check branch archives after 1 day
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Package Repository Remotes
|
#### Package Repository Remotes
|
||||||
For Linux package repositories with index files:
|
For Linux package repositories:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -126,23 +132,25 @@ remotes:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "alpine"
|
package: "alpine"
|
||||||
description: "Alpine Linux APK package repository"
|
description: "Alpine Linux APK package repository"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*/x86_64/.*\\.apk$" # Only x86_64 packages
|
- ".*/x86_64/.*\\.apk$" # packages are immutable by content-hash
|
||||||
|
# APKINDEX.tar.gz is a package-type default mutable file — no mutable_patterns needed
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache packages indefinitely
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours
|
mutable_ttl: 7200 # re-fetch APKINDEX.tar.gz after 2 hours
|
||||||
|
|
||||||
almalinux:
|
almalinux:
|
||||||
base_url: "http://mirror.aarnet.edu.au/pub/almalinux"
|
base_url: "https://mirror.example.com/almalinux"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "rpm"
|
package: "rpm"
|
||||||
description: "AlmaLinux RPM package repository"
|
description: "AlmaLinux RPM package repository"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
- ".*/noarch/.*\\.rpm$"
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
# repomd.xml and repodata/* are package-type defaults
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache metadata files for 2 hours
|
mutable_ttl: 7200
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Local Repositories
|
#### Local Repositories
|
||||||
@@ -155,62 +163,45 @@ remotes:
|
|||||||
package: "generic"
|
package: "generic"
|
||||||
description: "Local generic file repository"
|
description: "Local generic file repository"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
### Include Patterns
|
### Immutable Patterns
|
||||||
|
|
||||||
Include patterns are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403.
|
`immutable_patterns` are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403.
|
||||||
|
|
||||||
|
Matched files are cached with `immutable_ttl` (default 0 = forever). Use these for versioned release artifacts that never change once published.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
# Exact project + architecture — most restrictive
|
|
||||||
- "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$"
|
- "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$"
|
||||||
|
|
||||||
# Any release asset for a project, any version
|
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
|
||||||
# File extension only — allow all files of a given type from any path
|
|
||||||
- ".*\\.tar\\.gz$"
|
- ".*\\.tar\\.gz$"
|
||||||
- ".*\\.rpm$"
|
|
||||||
- ".*\\.zip$"
|
|
||||||
|
|
||||||
# Architecture subtree — allow everything under x86_64/
|
|
||||||
- ".*/x86_64/.*"
|
|
||||||
|
|
||||||
# Combined: architecture + extension
|
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
- ".*/noarch/.*\\.rpm$"
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
|
||||||
# Docker image names (used with package: docker remotes)
|
|
||||||
- "^library/nginx" # nginx official images only
|
|
||||||
- "^rancher/" # all rancher/* images
|
|
||||||
- "^rancher/rke2-runtime" # specific image
|
|
||||||
|
|
||||||
# Repodata directories — allow all metadata for an RPM repo
|
|
||||||
- ".*/repodata/.*$"
|
- ".*/repodata/.*$"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Security note**: Omitting `include_patterns` entirely allows all files from that remote. Index files (e.g. `APKINDEX.tar.gz`, `repomd.xml`, tag manifests) always bypass pattern enforcement — they are served unconditionally so clients can discover available packages.
|
**Security note**: Omitting `immutable_patterns` entirely allows all files from that remote.
|
||||||
|
|
||||||
### Index Patterns
|
### Mutable Patterns
|
||||||
|
|
||||||
Index patterns identify repository metadata files. Index files get special treatment:
|
`mutable_patterns` identify files that change over time (index files, branch archives, metadata). Mutable files:
|
||||||
- **Always served** regardless of `include_patterns`
|
- **Always served** regardless of `immutable_patterns`
|
||||||
- **Cached with `index_ttl`** instead of `file_ttl`
|
- **Cached with `mutable_ttl`** and re-fetched from upstream when the TTL expires
|
||||||
- **Automatically refreshed** when the TTL expires — the cached copy is evicted and re-fetched on next request
|
- **Kept stale** when the upstream backend is unreachable — TTL is refreshed automatically so the cached copy remains available until the backend recovers (see below)
|
||||||
|
|
||||||
Built-in defaults per package type:
|
Built-in defaults per package type (no configuration needed):
|
||||||
|
|
||||||
| Package type | Built-in index patterns |
|
| Package type | Built-in mutable patterns |
|
||||||
|---|---|
|
|---|---|
|
||||||
| `alpine` | `APKINDEX\.tar\.gz$` |
|
| `alpine` | `APKINDEX\.tar\.gz$` |
|
||||||
| `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` |
|
| `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` |
|
||||||
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
||||||
| `generic` | *(none)* |
|
| `generic` | *(none)* |
|
||||||
|
|
||||||
Use `index_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically.
|
Use `mutable_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -218,60 +209,74 @@ remotes:
|
|||||||
base_url: "https://charts.example.com"
|
base_url: "https://charts.example.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*\\.tgz$" # chart archives
|
- ".*\\.tgz$"
|
||||||
index_patterns:
|
mutable_patterns:
|
||||||
- "index\\.yaml$" # Helm repo index — re-fetched on every TTL expiry
|
- "index\\.yaml$" # Helm repo index
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 600 # re-check the index every 10 minutes
|
mutable_ttl: 600 # re-check the index every 10 minutes
|
||||||
|
|
||||||
apt-mirror:
|
apt-mirror:
|
||||||
base_url: "https://apt.example.com"
|
base_url: "https://apt.example.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- ".*\\.deb$"
|
- ".*\\.deb$"
|
||||||
index_patterns:
|
mutable_patterns:
|
||||||
- "InRelease$" # signed APT release file
|
- "InRelease$"
|
||||||
- "Release$" # unsigned APT release file
|
- "Release$"
|
||||||
- "Packages\\.gz$" # compressed package list
|
- "Packages\\.gz$"
|
||||||
- "Packages\\.xz$"
|
- "Packages\\.xz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 3600 # hourly index refresh
|
mutable_ttl: 3600
|
||||||
|
|
||||||
almalinux-with-extras:
|
|
||||||
base_url: "https://mirror.example.com/almalinux"
|
|
||||||
type: "remote"
|
|
||||||
package: "rpm" # inherits repomd.xml + repodata/* defaults
|
|
||||||
include_patterns:
|
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
|
||||||
- ".*/noarch/.*\\.rpm$"
|
|
||||||
index_patterns:
|
|
||||||
- "comps\\.xml$" # optional group metadata (adds to rpm defaults)
|
|
||||||
cache:
|
|
||||||
file_ttl: 0
|
|
||||||
index_ttl: 7200
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Pattern matching uses `re.search`, so `"index\\.yaml$"` matches `/stable/index.yaml` and `/index.yaml`. Anchor with `^` to restrict to the path root.
|
### Conditional Revalidation (`check_mutable_updates`)
|
||||||
|
|
||||||
|
By default, when a mutable file's TTL expires the cached copy is evicted and the full file is re-downloaded on the next request. Setting `check_mutable_updates: true` on a remote enables a cheaper conditional check first:
|
||||||
|
|
||||||
|
1. On TTL expiry, a `HEAD` request is sent to the upstream with `If-None-Match` / `If-Modified-Since` headers (populated from the original download).
|
||||||
|
2. If the upstream replies **304 Not Modified**, the TTL is refreshed in place — no re-download, no S3 traffic.
|
||||||
|
3. If the upstream replies **200**, the cached copy is evicted and re-downloaded normally.
|
||||||
|
|
||||||
|
This only applies to user-defined `mutable_patterns`. Package-type built-in patterns (APKINDEX, repomd.xml, Docker manifests) are always re-fetched unconditionally.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
github-archive:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$"
|
||||||
|
mutable_patterns:
|
||||||
|
- ".*/archive/refs/heads/main\\.tar\\.gz$"
|
||||||
|
check_mutable_updates: true
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 86400
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stale-on-Upstream-Error
|
||||||
|
|
||||||
|
When a mutable file's TTL expires and the upstream backend **cannot be reached** (connection refused, DNS failure, timeout), the cached copy is **kept and its TTL refreshed** rather than evicted. This means:
|
||||||
|
|
||||||
|
- RPM repodata, Alpine indexes, branch archives, and other mutable files remain available during upstream outages.
|
||||||
|
- Clients continue to receive the last-known-good copy without errors.
|
||||||
|
- Once the backend recovers and the refreshed TTL next expires, normal eviction resumes.
|
||||||
|
|
||||||
|
This behaviour is automatic and requires no configuration. Only network-level failures trigger it — HTTP error responses (404, 503, etc.) are treated as the backend being reachable and proceed with normal expiry.
|
||||||
|
|
||||||
### Cache Configuration
|
### Cache Configuration
|
||||||
|
|
||||||
Control how long different file types are cached:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Regular files (0 = cache indefinitely)
|
immutable_ttl: 0 # Immutable files (0 = cache indefinitely, rarely changed)
|
||||||
index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds)
|
mutable_ttl: 3600 # Mutable files — TTL in seconds before re-fetch is attempted
|
||||||
```
|
```
|
||||||
|
|
||||||
**Index Files**: Repository metadata files that change frequently:
|
|
||||||
- Alpine: `APKINDEX.tar.gz`
|
|
||||||
- RPM: `repomd.xml`, `*-primary.xml.gz`, etc.
|
|
||||||
- These are automatically detected and use `index_ttl`
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
All runtime configuration comes from environment variables:
|
All runtime configuration comes from environment variables:
|
||||||
@@ -351,26 +356,26 @@ data:
|
|||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "GitHub releases and files"
|
description: "GitHub releases and files"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
|
|
||||||
hashicorp-releases:
|
hashicorp-releases:
|
||||||
base_url: "https://releases.hashicorp.com"
|
base_url: "https://releases.hashicorp.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "HashiCorp product releases"
|
description: "HashiCorp product releases"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Secret for Environment Variables
|
### 3. Secret for Environment Variables
|
||||||
@@ -778,8 +783,8 @@ remotes:
|
|||||||
username: "your-dockerhub-username"
|
username: "your-dockerhub-username"
|
||||||
password: "your-dockerhub-token" # PAT with read scope
|
password: "your-dockerhub-token" # PAT with read scope
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API.
|
A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API.
|
||||||
@@ -804,8 +809,8 @@ remotes:
|
|||||||
username: "your-github-username"
|
username: "your-github-username"
|
||||||
password: "ghp_your_github_pat" # read:packages scope required
|
password: "ghp_your_github_pat" # read:packages scope required
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`.
|
A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`.
|
||||||
@@ -844,7 +849,7 @@ Each entry needs a matching remote in `remotes.yaml` using the name from the rew
|
|||||||
|
|
||||||
#### Restricting which images are cached
|
#### Restricting which images are cached
|
||||||
|
|
||||||
Use `include_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node.
|
Use `immutable_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -852,17 +857,17 @@ remotes:
|
|||||||
base_url: "https://registry-1.docker.io"
|
base_url: "https://registry-1.docker.io"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "docker"
|
package: "docker"
|
||||||
include_patterns:
|
immutable_patterns:
|
||||||
- "^library/nginx" # official nginx only
|
- "^library/nginx" # official nginx only
|
||||||
- "^library/redis" # official redis only
|
- "^library/redis" # official redis only
|
||||||
- "^rancher/" # all rancher images
|
- "^rancher/" # all rancher images
|
||||||
- "^grafana/grafana" # specific image
|
- "^grafana/grafana" # specific image
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 300
|
mutable_ttl: 300
|
||||||
```
|
```
|
||||||
|
|
||||||
Omit `include_patterns` to allow all images from that registry.
|
Omit `immutable_patterns` to allow all images from that registry.
|
||||||
|
|
||||||
#### TLS configuration
|
#### TLS configuration
|
||||||
|
|
||||||
|
|||||||
+34
-5
@@ -32,6 +32,10 @@ class ArtifactRequest(BaseModel):
|
|||||||
include_pattern: str
|
include_pattern: str
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamUnreachable(Exception):
|
||||||
|
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||||
|
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -250,8 +254,21 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
|||||||
return {"url": url, "status": "error", "error": str(e)}
|
return {"url": url, "status": "error", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _upstream_reachable(url: str) -> bool:
|
||||||
|
"""HEAD with a short timeout. Returns False only on network/timeout errors."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
await client.head(url, timeout=10.0)
|
||||||
|
return True
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException):
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return True # 4xx/5xx means backend is up
|
||||||
|
|
||||||
|
|
||||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
|
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
|
||||||
"""Conditional HEAD against upstream. Returns False only on a definitive 304."""
|
"""Conditional HEAD against upstream. Returns False only on a definitive 304.
|
||||||
|
Raises UpstreamUnreachable if the backend cannot be contacted."""
|
||||||
meta = cache.get_mutable_meta(remote_name, path)
|
meta = cache.get_mutable_meta(remote_name, path)
|
||||||
if not meta:
|
if not meta:
|
||||||
return True
|
return True
|
||||||
@@ -268,25 +285,37 @@ async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -
|
|||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
response = await client.head(remote_url, headers=headers)
|
response = await client.head(remote_url, headers=headers)
|
||||||
return response.status_code != 304
|
return response.status_code != 304
|
||||||
except Exception:
|
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||||
return True
|
raise UpstreamUnreachable(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
||||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||||
|
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||||
|
|
||||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||||
|
|
||||||
if user_mutable:
|
if user_mutable:
|
||||||
changed = await check_upstream_changed(remote_url, remote_name, path)
|
try:
|
||||||
|
changed = await check_upstream_changed(remote_url, remote_name, path)
|
||||||
|
except UpstreamUnreachable:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
if not changed:
|
if not changed:
|
||||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||||
return True
|
return True
|
||||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||||
else:
|
else:
|
||||||
|
if not await _upstream_reachable(remote_url):
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||||
|
|
||||||
cache.cleanup_expired_index(storage, remote_name, path)
|
cache.cleanup_expired_index(storage, remote_name, path)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
+40
-6
@@ -248,12 +248,13 @@ class TestDockerProxy:
|
|||||||
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
||||||
deps["storage"].download_object.return_value = manifest
|
deps["storage"].download_object.return_value = manifest
|
||||||
|
|
||||||
with patch(
|
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||||
"artifactapi.main.cache_single_artifact",
|
with patch(
|
||||||
new_callable=AsyncMock,
|
"artifactapi.main.cache_single_artifact",
|
||||||
return_value={"status": "cached"},
|
new_callable=AsyncMock,
|
||||||
) as mock_fetch:
|
return_value={"status": "cached"},
|
||||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
) as mock_fetch:
|
||||||
|
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||||
|
|
||||||
mock_fetch.assert_called_once()
|
mock_fetch.assert_called_once()
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@@ -469,6 +470,39 @@ class TestGenericArtifactRoute:
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
mock_cache.assert_called_once()
|
mock_cache.assert_called_once()
|
||||||
|
|
||||||
|
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
||||||
|
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||||
|
from artifactapi.main import UpstreamUnreachable
|
||||||
|
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"stale metadata"
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
|
with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||||
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
deps["cache"].mark_index_cached.assert_called()
|
||||||
|
deps["storage"].client.delete_object.assert_not_called()
|
||||||
|
|
||||||
|
def test_mutable_backend_unreachable_on_expiry_keeps_stale(self, client, patched_deps):
|
||||||
|
"""When a regular mutable file expires and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"stale APKINDEX"
|
||||||
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
|
||||||
|
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||||
|
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
deps["cache"].mark_index_cached.assert_called()
|
||||||
|
deps["storage"].client.delete_object.assert_not_called()
|
||||||
|
|
||||||
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
|
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
|
||||||
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
|
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
|
||||||
deps = patched_deps
|
deps = patched_deps
|
||||||
|
|||||||
Reference in New Issue
Block a user