Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1cca9fef00 |
@@ -1,15 +0,0 @@
|
||||
.git/
|
||||
.venv/
|
||||
dist/
|
||||
tests/
|
||||
remotes.yaml
|
||||
ca-bundle.pem
|
||||
.env
|
||||
*.log
|
||||
docker-compose.yml
|
||||
.woodpecker/
|
||||
.tox/
|
||||
.ruff_cache/
|
||||
.pytest_cache/
|
||||
.pre-commit-cache/
|
||||
minio_data/
|
||||
+1
-15
@@ -35,6 +35,7 @@ env/
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
remotes.yaml
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
@@ -42,20 +43,5 @@ env/
|
||||
# uv
|
||||
uv.lock
|
||||
|
||||
# tox
|
||||
.tox/
|
||||
|
||||
# pytest
|
||||
.pytest_cache/
|
||||
|
||||
# pre-commit
|
||||
.pre-commit-cache/
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# Docker volumes
|
||||
minio_data/
|
||||
|
||||
# Local configuration overrides
|
||||
ca-bundle.pem
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.15.12
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
@@ -1,9 +0,0 @@
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
steps:
|
||||
- name: docker-build
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
repo: git.unkin.net/unkin/artifactapi
|
||||
dry_run: true
|
||||
@@ -1,18 +0,0 @@
|
||||
when:
|
||||
- event: tag
|
||||
ref: refs/tags/v*
|
||||
|
||||
steps:
|
||||
- name: docker
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: git.unkin.net
|
||||
repo: git.unkin.net/unkin/artifactapi
|
||||
username: droneci
|
||||
password:
|
||||
from_secret: DRONECI_PASSWORD
|
||||
tags:
|
||||
- ${CI_COMMIT_TAG}
|
||||
- latest
|
||||
build_args:
|
||||
- VERSION=${CI_COMMIT_TAG##v}
|
||||
@@ -1,9 +0,0 @@
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
steps:
|
||||
- name: pre-commit
|
||||
image: git.unkin.net/unkin/almalinux9-base:20260308
|
||||
commands:
|
||||
- uvx pre-commit run --all-files
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: git.unkin.net/unkin/almalinux9-base:20260308
|
||||
commands:
|
||||
- uvx --python 3.11 --with tox-uv tox
|
||||
+51
-21
@@ -1,23 +1,53 @@
|
||||
FROM git.unkin.net/unkin/almalinux9-base:latest
|
||||
# Use Alpine Linux as base image
|
||||
FROM python:3.11-alpine
|
||||
|
||||
ARG VERSION=0.0.0.dev0
|
||||
|
||||
COPY . /build
|
||||
|
||||
RUN HATCH_VCS_PRETEND_VERSION=${VERSION} \
|
||||
SETUPTOOLS_SCM_PRETEND_VERSION=${VERSION} \
|
||||
uv build --wheel --directory /build && \
|
||||
useradd -m -r -s /bin/sh appuser
|
||||
|
||||
USER appuser
|
||||
RUN uv tool install --from /build/dist/*.whl artifactapi
|
||||
|
||||
USER root
|
||||
RUN rm -rf /build
|
||||
|
||||
EXPOSE 8000
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD curl -f http://localhost:8000/health || exit 1
|
||||
USER appuser
|
||||
ENV PATH="/home/appuser/.local/bin:$PATH"
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
CMD ["artifactapi"]
|
||||
|
||||
# Install system dependencies
|
||||
RUN apk add --no-cache \
|
||||
gcc \
|
||||
musl-dev \
|
||||
libffi-dev \
|
||||
postgresql-dev \
|
||||
curl \
|
||||
wget \
|
||||
tar
|
||||
|
||||
# Install uv
|
||||
ARG PACKAGE_VERSION=0.9.21
|
||||
RUN wget -O /app/uv-x86_64-unknown-linux-musl.tar.gz https://github.com/astral-sh/uv/releases/download/${PACKAGE_VERSION}/uv-x86_64-unknown-linux-musl.tar.gz && \
|
||||
tar xf /app/uv-x86_64-unknown-linux-musl.tar.gz -C /app && \
|
||||
mv /app/uv-x86_64-unknown-linux-musl/uv /usr/local/bin/uv && \
|
||||
rm -rf /app/uv-x86_64-unknown-linux-musl* && \
|
||||
chmod +x /usr/local/bin/uv && \
|
||||
uv --version
|
||||
|
||||
# Copy CA bundle from host
|
||||
COPY ca-bundle.pem /app/ca-bundle.pem
|
||||
RUN chmod 644 /app/ca-bundle.pem
|
||||
|
||||
# Create non-root user first
|
||||
RUN adduser -D -s /bin/sh appuser && \
|
||||
chown -R appuser:appuser /app
|
||||
|
||||
# Copy dependency files and change ownership
|
||||
COPY --chown=appuser:appuser pyproject.toml uv.lock README.md ./
|
||||
|
||||
# Switch to appuser and install Python dependencies
|
||||
USER appuser
|
||||
RUN uv sync --frozen
|
||||
|
||||
# Copy application source
|
||||
COPY --chown=appuser:appuser src/ ./src/
|
||||
COPY --chown=appuser:appuser remotes.yaml ./
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["uv", "run", "python", "-m", "src.artifactapi.main"]
|
||||
@@ -1,7 +1,7 @@
|
||||
.PHONY: build install dev clean test lint format pre-commit tox docker-build docker-up docker-down docker-logs docker-rebuild docker-clean docker-restart
|
||||
.PHONY: build install dev clean test lint format docker-build docker-up docker-down docker-logs docker-rebuild docker-clean docker-restart
|
||||
|
||||
build:
|
||||
docker build -t artifactapi:dev .
|
||||
docker build --no-cache -t artifactapi:latest .
|
||||
|
||||
install: build
|
||||
|
||||
@@ -17,13 +17,7 @@ clean:
|
||||
rm -rf *.egg-info/
|
||||
|
||||
test:
|
||||
uvx --python 3.11 --with tox-uv tox
|
||||
|
||||
tox:
|
||||
uvx --python 3.11 --with tox-uv tox
|
||||
|
||||
pre-commit:
|
||||
uvx --python 3.11 pre-commit run --all-files
|
||||
uv run pytest
|
||||
|
||||
lint:
|
||||
uv run ruff check --fix .
|
||||
@@ -32,6 +26,8 @@ format:
|
||||
uv run ruff format .
|
||||
|
||||
run:
|
||||
uv venv --python 3.11 && \
|
||||
source .venv/bin/activate && \
|
||||
uv run python -m src.artifactapi.main
|
||||
|
||||
docker-up:
|
||||
@@ -51,26 +47,3 @@ docker-clean:
|
||||
docker system prune -f
|
||||
|
||||
docker-restart: docker-down docker-up
|
||||
|
||||
# Bump helpers — reads the latest semver tag and creates the next one.
|
||||
# If no tag exists yet, starts from v0.0.0.
|
||||
_LATEST := $(shell git tag --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$$' | head -1)
|
||||
_BASE := $(if $(_LATEST),$(_LATEST),v0.0.0)
|
||||
_MAJ := $(shell echo $(_BASE) | sed 's/^v//' | cut -d. -f1)
|
||||
_MIN := $(shell echo $(_BASE) | sed 's/^v//' | cut -d. -f2)
|
||||
_PAT := $(shell echo $(_BASE) | sed 's/^v//' | cut -d. -f3)
|
||||
|
||||
patch:
|
||||
@NEW=v$(_MAJ).$(_MIN).$(shell expr $(_PAT) + 1); \
|
||||
git tag $$NEW && echo "Tagged $$NEW" && $(MAKE) _tag TAG=$$NEW
|
||||
|
||||
minor:
|
||||
@NEW=v$(_MAJ).$(shell expr $(_MIN) + 1).0; \
|
||||
git tag $$NEW && echo "Tagged $$NEW" && $(MAKE) _tag TAG=$$NEW
|
||||
|
||||
major:
|
||||
@NEW=v$(shell expr $(_MAJ) + 1).0.0; \
|
||||
git tag $$NEW && echo "Tagged $$NEW" && $(MAKE) _tag TAG=$$NEW
|
||||
|
||||
_tag:
|
||||
git push origin $(TAG)
|
||||
|
||||
@@ -6,15 +6,10 @@ A generic FastAPI-based artifact caching system that downloads and stores files
|
||||
|
||||
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
||||
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
||||
- **Direct URL API**: Access cached files via clean URLs like `/api/v1/remote/github/owner/repo/path/file.tar.gz`
|
||||
- **Immutable/Mutable Pattern Model**: Per-remote regex patterns distinguish forever-cached artifacts from TTL-expiring metadata
|
||||
- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz`
|
||||
- **Pattern Filtering**: Regex-based inclusion patterns for security and organization
|
||||
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
||||
- **Conditional Revalidation**: Optional `check_mutable_updates` flag — sends `If-None-Match`/`If-Modified-Since` on expiry; skips re-download on 304
|
||||
- **Stale-on-Upstream-Error**: Expired mutable files are kept and their TTL refreshed when the backend cannot be reached, so cached data remains available during upstream outages
|
||||
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
||||
- **Docker Registry Proxy**: Full Docker Registry HTTP API v2 for transparent container image caching
|
||||
- **npm Package Proxy**: Caching proxy for the npm registry with metadata URL rewriting so tarballs also pass through cache
|
||||
- **Helm Chart Repository Proxy**: Caching proxy for Helm chart repositories with `index.yaml` URL rewriting so chart tarballs also pass through cache
|
||||
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
||||
|
||||
## Architecture
|
||||
@@ -76,18 +71,15 @@ The system uses `remotes.yaml` to define remote repositories and access patterns
|
||||
remotes:
|
||||
remote-name:
|
||||
base_url: "https://example.com" # Base URL for the remote
|
||||
type: "remote" # "remote" or "local"
|
||||
package: "generic" # "generic", "alpine", "rpm", or "docker"
|
||||
type: "remote" # Type: "remote" or "local"
|
||||
package: "generic" # Package type: "generic", "alpine", "rpm"
|
||||
description: "Human readable description"
|
||||
immutable_patterns: # Files cached forever (release binaries, versioned tags)
|
||||
include_patterns: # Regex patterns for allowed files
|
||||
- "pattern1"
|
||||
- "pattern2"
|
||||
mutable_patterns: # Files that expire after mutable_ttl (optional)
|
||||
- "pattern3"
|
||||
check_mutable_updates: false # Enable conditional HEAD before re-fetching (optional)
|
||||
cache:
|
||||
immutable_ttl: 0 # TTL for immutable files (0 = indefinitely)
|
||||
mutable_ttl: 3600 # TTL in seconds for mutable files
|
||||
cache: # Cache configuration (optional)
|
||||
file_ttl: 0 # File cache TTL (0 = indefinite)
|
||||
index_ttl: 300 # Index file TTL in seconds
|
||||
```
|
||||
|
||||
### Remote Types
|
||||
@@ -102,30 +94,30 @@ remotes:
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub releases and files"
|
||||
immutable_patterns:
|
||||
include_patterns:
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
- "lxc/incus/.*\\.tar\\.gz$"
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Cache files indefinitely
|
||||
file_ttl: 0 # Cache files indefinitely
|
||||
index_ttl: 0 # No index files for generic remotes
|
||||
|
||||
github-archive:
|
||||
base_url: "https://github.com"
|
||||
hashicorp-releases:
|
||||
base_url: "https://releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub repository archive tarballs"
|
||||
immutable_patterns:
|
||||
- ".*/archive/refs/tags/.*\\.tar\\.gz$" # tag archives never change
|
||||
mutable_patterns:
|
||||
- ".*/archive/refs/heads/main\\.tar\\.gz$" # branch archives can change
|
||||
check_mutable_updates: true # send If-None-Match on expiry; skip re-download on 304
|
||||
description: "HashiCorp product releases"
|
||||
include_patterns:
|
||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 86400 # re-check branch archives after 1 day
|
||||
file_ttl: 0
|
||||
index_ttl: 0
|
||||
```
|
||||
|
||||
#### Package Repository Remotes
|
||||
For Linux package repositories:
|
||||
For Linux package repositories with index files:
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
@@ -134,25 +126,23 @@ remotes:
|
||||
type: "remote"
|
||||
package: "alpine"
|
||||
description: "Alpine Linux APK package repository"
|
||||
immutable_patterns:
|
||||
- ".*/x86_64/.*\\.apk$" # packages are immutable by content-hash
|
||||
# APKINDEX.tar.gz is a package-type default mutable file — no mutable_patterns needed
|
||||
include_patterns:
|
||||
- ".*/x86_64/.*\\.apk$" # Only x86_64 packages
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 7200 # re-fetch APKINDEX.tar.gz after 2 hours
|
||||
file_ttl: 0 # Cache packages indefinitely
|
||||
index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours
|
||||
|
||||
almalinux:
|
||||
base_url: "https://mirror.example.com/almalinux"
|
||||
base_url: "http://mirror.aarnet.edu.au/pub/almalinux"
|
||||
type: "remote"
|
||||
package: "rpm"
|
||||
description: "AlmaLinux RPM package repository"
|
||||
immutable_patterns:
|
||||
include_patterns:
|
||||
- ".*/x86_64/.*\\.rpm$"
|
||||
- ".*/noarch/.*\\.rpm$"
|
||||
# repomd.xml and repodata/* are package-type defaults
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 7200
|
||||
file_ttl: 0
|
||||
index_ttl: 7200 # Cache metadata files for 2 hours
|
||||
```
|
||||
|
||||
#### Local Repositories
|
||||
@@ -165,120 +155,49 @@ remotes:
|
||||
package: "generic"
|
||||
description: "Local generic file repository"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 0
|
||||
file_ttl: 0
|
||||
index_ttl: 0
|
||||
```
|
||||
|
||||
### Immutable Patterns
|
||||
### Include Patterns
|
||||
|
||||
`immutable_patterns` are regular expressions that control which files can be accessed. Patterns use Python `re.search`, so they match anywhere in the path unless anchored with `^` or `$`. Only files matching at least one pattern are served; all others return HTTP 403.
|
||||
|
||||
Matched files are cached with `immutable_ttl` (default 0 = forever). Use these for versioned release artifacts that never change once published.
|
||||
Include patterns are regular expressions that control which files can be accessed:
|
||||
|
||||
```yaml
|
||||
immutable_patterns:
|
||||
- "^gruntwork-io/terragrunt/releases/download/.*/terragrunt_linux_amd64$"
|
||||
include_patterns:
|
||||
# Specific project patterns
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
|
||||
# File extension patterns
|
||||
- ".*\\.tar\\.gz$"
|
||||
- ".*/x86_64/.*\\.rpm$"
|
||||
- ".*/noarch/.*\\.rpm$"
|
||||
- ".*/repodata/.*$"
|
||||
- ".*\\.zip$"
|
||||
- ".*\\.rpm$"
|
||||
|
||||
# Architecture-specific patterns
|
||||
- ".*/x86_64/.*"
|
||||
- ".*/linux-amd64/.*"
|
||||
|
||||
# Version-specific patterns
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
```
|
||||
|
||||
**Security note**: Omitting `immutable_patterns` entirely allows all files from that remote.
|
||||
|
||||
### Mutable Patterns
|
||||
|
||||
`mutable_patterns` identify files that change over time (index files, branch archives, metadata). Mutable files:
|
||||
- **Always served** regardless of `immutable_patterns`
|
||||
- **Cached with `mutable_ttl`** and re-fetched from upstream when the TTL expires
|
||||
- **Kept stale** when the upstream backend is unreachable — TTL is refreshed automatically so the cached copy remains available until the backend recovers (see below)
|
||||
|
||||
Built-in defaults per package type (no configuration needed):
|
||||
|
||||
| Package type | Built-in mutable patterns |
|
||||
|---|---|
|
||||
| `alpine` | `APKINDEX\.tar\.gz$` |
|
||||
| `rpm` | `repomd\.xml$`, `repodata/` metadata (xml, sqlite, yaml, asc, txt variants), `Packages\.gz$` |
|
||||
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
||||
| `generic` | *(none)* |
|
||||
|
||||
Use `mutable_patterns` to add extra patterns on top of the defaults. Duplicates are ignored automatically.
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
helm-charts:
|
||||
base_url: "https://charts.example.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
immutable_patterns:
|
||||
- ".*\\.tgz$"
|
||||
mutable_patterns:
|
||||
- "index\\.yaml$" # Helm repo index
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # re-check the index every 10 minutes
|
||||
|
||||
apt-mirror:
|
||||
base_url: "https://apt.example.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
immutable_patterns:
|
||||
- ".*\\.deb$"
|
||||
mutable_patterns:
|
||||
- "InRelease$"
|
||||
- "Release$"
|
||||
- "Packages\\.gz$"
|
||||
- "Packages\\.xz$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 3600
|
||||
```
|
||||
|
||||
### Conditional Revalidation (`check_mutable_updates`)
|
||||
|
||||
By default, when a mutable file's TTL expires the cached copy is evicted and the full file is re-downloaded on the next request. Setting `check_mutable_updates: true` on a remote enables a cheaper conditional check first:
|
||||
|
||||
1. On TTL expiry, a `HEAD` request is sent to the upstream with `If-None-Match` / `If-Modified-Since` headers (populated from the original download).
|
||||
2. If the upstream replies **304 Not Modified**, the TTL is refreshed in place — no re-download, no S3 traffic.
|
||||
3. If the upstream replies **200**, the cached copy is evicted and re-downloaded normally.
|
||||
|
||||
This only applies to user-defined `mutable_patterns`. Package-type built-in patterns (APKINDEX, repomd.xml, Docker manifests) are always re-fetched unconditionally.
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
github-archive:
|
||||
base_url: "https://github.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
immutable_patterns:
|
||||
- ".*/archive/refs/tags/.*\\.tar\\.gz$"
|
||||
mutable_patterns:
|
||||
- ".*/archive/refs/heads/main\\.tar\\.gz$"
|
||||
check_mutable_updates: true
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 86400
|
||||
```
|
||||
|
||||
### Stale-on-Upstream-Error
|
||||
|
||||
When a mutable file's TTL expires and the upstream backend **cannot be reached** (connection refused, DNS failure, timeout), the cached copy is **kept and its TTL refreshed** rather than evicted. This means:
|
||||
|
||||
- RPM repodata, Alpine indexes, branch archives, and other mutable files remain available during upstream outages.
|
||||
- Clients continue to receive the last-known-good copy without errors.
|
||||
- Once the backend recovers and the refreshed TTL next expires, normal eviction resumes.
|
||||
|
||||
This behaviour is automatic and requires no configuration. Only network-level failures trigger it — HTTP error responses (404, 503, etc.) are treated as the backend being reachable and proceed with normal expiry.
|
||||
**Security Note**: Only files matching at least one include pattern are accessible. Files not matching any pattern return HTTP 403.
|
||||
|
||||
### Cache Configuration
|
||||
|
||||
Control how long different file types are cached:
|
||||
|
||||
```yaml
|
||||
cache:
|
||||
immutable_ttl: 0 # Immutable files (0 = cache indefinitely, rarely changed)
|
||||
mutable_ttl: 3600 # Mutable files — TTL in seconds before re-fetch is attempted
|
||||
file_ttl: 0 # Regular files (0 = cache indefinitely)
|
||||
index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds)
|
||||
```
|
||||
|
||||
**Index Files**: Repository metadata files that change frequently:
|
||||
- Alpine: `APKINDEX.tar.gz`
|
||||
- RPM: `repomd.xml`, `*-primary.xml.gz`, etc.
|
||||
- These are automatically detected and use `index_ttl`
|
||||
|
||||
### Environment Variables
|
||||
|
||||
All runtime configuration comes from environment variables:
|
||||
@@ -358,26 +277,26 @@ data:
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub releases and files"
|
||||
immutable_patterns:
|
||||
include_patterns:
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
- "lxc/incus/.*\\.tar\\.gz$"
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 0
|
||||
file_ttl: 0
|
||||
index_ttl: 0
|
||||
|
||||
hashicorp-releases:
|
||||
base_url: "https://releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "HashiCorp product releases"
|
||||
immutable_patterns:
|
||||
include_patterns:
|
||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 0
|
||||
file_ttl: 0
|
||||
index_ttl: 0
|
||||
```
|
||||
|
||||
### 3. Secret for Environment Variables
|
||||
@@ -743,406 +662,3 @@ curl "http://localhost:8000/api/github/gruntwork-io/terragrunt/releases/download
|
||||
- Configure backup strategies for persistent volumes
|
||||
- Set up proper TLS certificates for ingress
|
||||
- Consider using StatefulSets for databases with persistent storage
|
||||
|
||||
## Docker Image Rewriting with RKE2
|
||||
|
||||
RKE2 can route container image pulls through registry mirrors using `/etc/rancher/rke2/registries.yaml`. The artifact API implements the Docker Registry HTTP API v2 at `/v2/`, so it acts as a transparent caching mirror for any upstream registry.
|
||||
|
||||
### How it works
|
||||
|
||||
1. A pod requests `docker.io/library/nginx:latest`
|
||||
2. RKE2 intercepts the pull and rewrites the image path using the `rewrite` rules
|
||||
3. The rewritten request hits the artifact API (`/v2/dockerhub/library/nginx/manifests/latest`)
|
||||
4. On first access the API fetches the manifest and layers from Docker Hub and caches them in S3
|
||||
5. Subsequent pulls are served directly from cache, with no upstream traffic
|
||||
|
||||
### registries.yaml
|
||||
|
||||
Place this file on every RKE2 node at `/etc/rancher/rke2/registries.yaml`. The `rewrite` field maps the original image path (as the upstream registry sees it) to the path the artifact API expects under `/v2/{remote_name}/...`.
|
||||
|
||||
#### Docker Hub
|
||||
|
||||
Docker Hub resolves unqualified image names like `nginx` as `library/nginx`. The rewrite prepends the remote name so the request lands on the correct remote.
|
||||
|
||||
```yaml
|
||||
# /etc/rancher/rke2/registries.yaml
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "dockerhub/$1"
|
||||
```
|
||||
|
||||
Corresponding `remotes.yaml` entry:
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
dockerhub:
|
||||
base_url: "https://registry-1.docker.io"
|
||||
type: "remote"
|
||||
package: "docker"
|
||||
username: "your-dockerhub-username"
|
||||
password: "your-dockerhub-token" # PAT with read scope
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
```
|
||||
|
||||
A pull of `nginx:latest` becomes `/v2/dockerhub/library/nginx/manifests/latest` on the artifact API.
|
||||
|
||||
#### GitHub Container Registry (ghcr.io)
|
||||
|
||||
```yaml
|
||||
mirrors:
|
||||
ghcr.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "ghcr/$1"
|
||||
```
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
ghcr:
|
||||
base_url: "https://ghcr.io"
|
||||
type: "remote"
|
||||
package: "docker"
|
||||
username: "your-github-username"
|
||||
password: "ghp_your_github_pat" # read:packages scope required
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
```
|
||||
|
||||
A pull of `ghcr.io/rancher/rke2-runtime:v1.30.0-rke2r1` becomes `/v2/ghcr/rancher/rke2-runtime/manifests/v1.30.0-rke2r1`.
|
||||
|
||||
#### Multiple registries
|
||||
|
||||
```yaml
|
||||
# /etc/rancher/rke2/registries.yaml
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "dockerhub/$1"
|
||||
|
||||
ghcr.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "ghcr/$1"
|
||||
|
||||
registry.k8s.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "k8s-registry/$1"
|
||||
|
||||
quay.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "quay/$1"
|
||||
```
|
||||
|
||||
Each entry needs a matching remote in `remotes.yaml` using the name from the rewrite target (e.g. `k8s-registry`, `quay`).
|
||||
|
||||
#### Restricting which images are cached
|
||||
|
||||
Use `immutable_patterns` on the remote to allow only specific images through the proxy. Requests for images not matching any pattern return HTTP 403 to the node.
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
dockerhub:
|
||||
base_url: "https://registry-1.docker.io"
|
||||
type: "remote"
|
||||
package: "docker"
|
||||
immutable_patterns:
|
||||
- "^library/nginx" # official nginx only
|
||||
- "^library/redis" # official redis only
|
||||
- "^rancher/" # all rancher images
|
||||
- "^grafana/grafana" # specific image
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
```
|
||||
|
||||
Omit `immutable_patterns` to allow all images from that registry.
|
||||
|
||||
#### TLS configuration
|
||||
|
||||
If the artifact API uses a private CA certificate, tell containerd about it in `registries.yaml`:
|
||||
|
||||
```yaml
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "dockerhub/$1"
|
||||
|
||||
configs:
|
||||
"artifacts.example.com":
|
||||
tls:
|
||||
ca_file: /etc/ssl/certs/internal-ca.crt
|
||||
```
|
||||
|
||||
### Applying the configuration
|
||||
|
||||
```bash
|
||||
# Write registries.yaml on each node (server and agent)
|
||||
sudo mkdir -p /etc/rancher/rke2
|
||||
sudo tee /etc/rancher/rke2/registries.yaml <<'EOF'
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "dockerhub/$1"
|
||||
ghcr.io:
|
||||
endpoint:
|
||||
- "https://artifacts.example.com"
|
||||
rewrite:
|
||||
"^(.*)$": "ghcr/$1"
|
||||
EOF
|
||||
|
||||
# Restart the RKE2 service (server nodes)
|
||||
sudo systemctl restart rke2-server
|
||||
|
||||
# Or on agent nodes
|
||||
sudo systemctl restart rke2-agent
|
||||
|
||||
# Confirm containerd picked up the mirror config
|
||||
sudo /var/lib/rancher/rke2/bin/crictl info | jq '.config.registry.mirrors'
|
||||
```
|
||||
|
||||
### Verifying pulls go through the cache
|
||||
|
||||
```bash
|
||||
# Pull an image on a node
|
||||
sudo /var/lib/rancher/rke2/bin/crictl pull nginx:latest
|
||||
|
||||
# Check the artifact API received the request
|
||||
kubectl logs deployment/artifactapi -n artifact-storage | grep "nginx"
|
||||
# Expect: Cache MISS on first pull, Cache HIT on subsequent pulls
|
||||
|
||||
# Query the manifest endpoint directly — 200 means it's cached
|
||||
curl -I https://artifacts.example.com/v2/dockerhub/library/nginx/manifests/latest
|
||||
|
||||
# Check what's stored in the cache
|
||||
curl https://artifacts.example.com/ | jq '.remotes'
|
||||
```
|
||||
|
||||
## Python Package Proxy with uv
|
||||
|
||||
The `pypi` package type turns the artifact API into a caching PyPI proxy. Simple index pages (`/simple/{package}/`) are mutable and expire after `mutable_ttl`; package files (wheels, sdists, metadata) are immutable and cached forever. URLs in the simple index HTML are rewritten on the fly to point back through the proxy, so both the index lookup and the file download are served from cache.
|
||||
|
||||
### remotes.yaml
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
pypi:
|
||||
base_url: "https://pypi.org"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
pypi_files_url: "https://files.pythonhosted.org" # host to rewrite in index HTML
|
||||
pypi_files_remote: "pypi-files" # our proxy remote to replace it with
|
||||
check_mutable_updates: true
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # re-check simple indexes after 10 minutes
|
||||
|
||||
pypi-files:
|
||||
base_url: "https://files.pythonhosted.org"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
immutable_patterns:
|
||||
- "packages/.*\\.whl$"
|
||||
- "packages/.*\\.whl\\.metadata$"
|
||||
- "packages/.*\\.tar\\.gz$"
|
||||
- "packages/.*\\.zip$"
|
||||
- "packages/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0 # package files are content-addressed — cache forever
|
||||
|
||||
# Self-hosted Gitea PyPI registry (index and files share the same base URL)
|
||||
pypi-gitea:
|
||||
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
# username: "your-gitea-username"
|
||||
# password: "your-personal-access-token" # needs package:read scope
|
||||
pypi_files_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||
pypi_files_remote: "pypi-gitea" # point back to itself — Gitea serves both index and files
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "files/.*\\.whl$"
|
||||
- "files/.*\\.whl\\.metadata$"
|
||||
- "files/.*\\.tar\\.gz$"
|
||||
- "files/.*\\.zip$"
|
||||
- "files/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600
|
||||
```
|
||||
|
||||
### Configuring uv system- or user-wide
|
||||
|
||||
uv reads `uv.toml` from two locations outside any project, applied in order from broadest to narrowest scope:
|
||||
|
||||
| Scope | Path (Linux/macOS) |
|
||||
|---|---|
|
||||
| System | `/etc/uv/uv.toml` |
|
||||
| User | `~/.config/uv/uv.toml` |
|
||||
|
||||
Use these files to route **all** package installs on a machine through the proxy without touching individual projects or their `pyproject.toml`.
|
||||
|
||||
**`/etc/uv/uv.toml`** — applies to every user on the host:
|
||||
|
||||
```toml
|
||||
# Replace the default PyPI index with the caching proxy
|
||||
[[index]]
|
||||
url = "https://artifacts.example.com/api/v1/remote/pypi/simple"
|
||||
default = true
|
||||
|
||||
# Optionally add a private index (searched alongside the default)
|
||||
[[index]]
|
||||
url = "https://artifacts.example.com/api/v1/remote/pypi-gitea/simple"
|
||||
name = "gitea"
|
||||
```
|
||||
|
||||
**`~/.config/uv/uv.toml`** — same syntax, single-user scope:
|
||||
|
||||
```toml
|
||||
[[index]]
|
||||
url = "https://artifacts.example.com/api/v1/remote/pypi/simple"
|
||||
default = true
|
||||
```
|
||||
|
||||
Setting `default = true` replaces uv's built-in PyPI index. The first install of a package fetches it from upstream and populates the cache; every subsequent install — from any machine or fresh environment pointing at the same proxy — is served directly from S3.
|
||||
|
||||
### How the rewriting works
|
||||
|
||||
When uv requests the simple index for a package, the proxy:
|
||||
|
||||
1. Fetches `https://pypi.org/simple/{package}/` (or returns a valid cached copy within `mutable_ttl`)
|
||||
2. Rewrites every `https://files.pythonhosted.org/...` href to `https://artifacts.example.com/api/v1/remote/pypi-files/...`
|
||||
3. Returns the rewritten HTML to uv
|
||||
|
||||
uv then downloads wheels and `.whl.metadata` files via the rewritten URLs, which also pass through the proxy and are cached as immutable artifacts.
|
||||
|
||||
For self-hosted registries like Gitea, both the index and file downloads share the same base URL. Setting `pypi_files_url` and `pypi_files_remote` to the same remote causes file links to be rewritten back through the same proxy entry.
|
||||
|
||||
## npm Package Proxy
|
||||
|
||||
The `npm` package type turns the artifact API into a caching npm registry proxy. Since the npm registry serves both metadata and tarballs from the same host, a single remote handles everything. Package metadata (e.g. `GET /express`) is mutable and expires after `mutable_ttl`; tarballs (`.tgz`) are immutable and cached forever. `dist.tarball` URLs in metadata JSON are rewritten on the fly to point back through the same remote, so both the metadata lookup and the tarball download are served from cache.
|
||||
|
||||
### remotes.yaml
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
npm:
|
||||
base_url: "https://registry.npmjs.org"
|
||||
type: "remote"
|
||||
package: "npm"
|
||||
npm_files_url: "https://registry.npmjs.org" # URL prefix to rewrite in metadata JSON
|
||||
npm_files_remote: "npm" # rewrite back to this same remote
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "\.tgz$" # versioned tarballs are content-addressed — cache forever
|
||||
mutable_patterns:
|
||||
- "^(?!.*\.tgz$).*" # everything else (package metadata) expires after mutable_ttl
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # re-check package metadata after 10 minutes
|
||||
```
|
||||
|
||||
### Configuring npm / yarn / pnpm
|
||||
|
||||
**npm** — per-project `.npmrc` or `~/.npmrc`:
|
||||
|
||||
```ini
|
||||
registry=https://artifacts.example.com/api/v1/remote/npm/
|
||||
```
|
||||
|
||||
**yarn** — `~/.yarnrc.yml`:
|
||||
|
||||
```yaml
|
||||
npmRegistryServer: "https://artifacts.example.com/api/v1/remote/npm/"
|
||||
```
|
||||
|
||||
**pnpm** — `.npmrc`:
|
||||
|
||||
```ini
|
||||
registry=https://artifacts.example.com/api/v1/remote/npm/
|
||||
```
|
||||
|
||||
### How the rewriting works
|
||||
|
||||
When a client requests package metadata, the proxy:
|
||||
|
||||
1. Fetches `https://registry.npmjs.org/{package}` (or returns a cached copy within `mutable_ttl`)
|
||||
2. Rewrites every `https://registry.npmjs.org/...` tarball URL to `https://artifacts.example.com/api/v1/remote/npm/...`
|
||||
3. Returns the rewritten JSON to the client
|
||||
|
||||
The client then downloads the tarball via the rewritten URL, which hits the same `npm` remote and is cached as an immutable artifact. Subsequent installs of the same package version are served entirely from S3.
|
||||
|
||||
### Mutable vs immutable paths
|
||||
|
||||
| Path pattern | Type | Example |
|
||||
|---|---|---|
|
||||
| `/{package}` | Mutable (TTL) | `/express` |
|
||||
| `/@{scope}/{package}` | Mutable (TTL) | `/@babel/core` |
|
||||
| `/-/all` | Mutable (TTL) | `/-/all` |
|
||||
| `/{package}/-/{package}-{version}.tgz` | Immutable (forever) | `/express/-/express-4.18.2.tgz` |
|
||||
| `/@{scope}/{pkg}/-/{pkg}-{ver}.tgz` | Immutable (forever) | `/@babel/core/-/core-7.21.0.tgz` |
|
||||
|
||||
## Helm Chart Repository Proxy
|
||||
|
||||
The `helm` package type turns the artifact API into a caching Helm chart repository proxy. A single remote handles both the mutable `index.yaml` and the immutable versioned chart tarballs, since they are served from the same upstream host. Chart URLs inside `index.yaml` are rewritten on the fly to point back through the same remote, so both the index lookup and the chart download are served from cache.
|
||||
|
||||
### remotes.yaml
|
||||
|
||||
```yaml
|
||||
remotes:
|
||||
hashicorp-helm:
|
||||
base_url: "https://helm.releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "helm"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "\\.tgz$" # chart tarballs — cache forever
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||
```
|
||||
|
||||
### Configuring Helm
|
||||
|
||||
Point Helm at the proxy with `helm repo add`:
|
||||
|
||||
```bash
|
||||
helm repo add hashicorp https://artifacts.example.com/api/v1/remote/hashicorp-helm
|
||||
helm repo update
|
||||
helm search repo hashicorp/vault
|
||||
helm install vault hashicorp/vault
|
||||
```
|
||||
|
||||
### How the rewriting works
|
||||
|
||||
When a client requests `index.yaml`, the proxy:
|
||||
|
||||
1. Fetches `https://helm.releases.hashicorp.com/index.yaml` (or returns a cached copy within `mutable_ttl`)
|
||||
2. Rewrites every `https://helm.releases.hashicorp.com/...` chart URL to `https://artifacts.example.com/api/v1/remote/hashicorp-helm/...`
|
||||
3. Returns the rewritten YAML to the client
|
||||
|
||||
The client then downloads chart tarballs via the rewritten URLs, which hit the same `hashicorp-helm` remote and are cached as immutable artifacts. Subsequent installs of the same chart version are served entirely from S3.
|
||||
|
||||
### Mutable vs immutable paths
|
||||
|
||||
| Path | Type | Example |
|
||||
|---|---|---|
|
||||
| `index.yaml` | Mutable (TTL) | `index.yaml` |
|
||||
| `{chart}-{version}.tgz` | Immutable (forever) | `vault-0.29.1.tgz` |
|
||||
@@ -1,137 +0,0 @@
|
||||
# ArtifactAPI Specification
|
||||
|
||||
## Repository model
|
||||
|
||||
Every repository entry in `remotes.yaml` has two orthogonal fields:
|
||||
|
||||
| field | values | meaning |
|
||||
|---|---|---|
|
||||
| `type` | `local`, `remote`, `virtual` | repository kind — how the repo is served |
|
||||
| `package` | `docker`, `rpm`, `alpine`, `generic` | package format — what protocol and caching rules to apply |
|
||||
|
||||
**type**
|
||||
|
||||
- `local` — files are uploaded directly to the API and stored in S3; no upstream.
|
||||
- `remote` — proxies and caches content from an upstream URL (`base_url`).
|
||||
- `virtual` — aggregates multiple repositories (not yet implemented).
|
||||
|
||||
**package**
|
||||
|
||||
- `docker` — upstream speaks the OCI Distribution API (Bearer auth, manifest/blob paths).
|
||||
- `rpm` — upstream is an RPM repository; repodata files are index files.
|
||||
- `alpine` — upstream is an Alpine APK repository; `APKINDEX.tar.gz` is an index file.
|
||||
- `generic` — plain HTTP file download; no format-specific logic.
|
||||
|
||||
---
|
||||
|
||||
## Caching
|
||||
|
||||
Two cache classes determine retention:
|
||||
|
||||
| class | stored | TTL |
|
||||
|---|---|---|
|
||||
| **file** | S3 object, no Redis entry | `file_ttl` — `0` means indefinite |
|
||||
| **index** | S3 object + Redis TTL key | `index_ttl` — when the Redis key expires the S3 object is deleted and re-fetched |
|
||||
|
||||
Index files are mutable metadata that must expire. File-class objects are treated as immutable and cached indefinitely (unless `file_ttl` is set).
|
||||
|
||||
---
|
||||
|
||||
## Docker package rules
|
||||
|
||||
### URL construction
|
||||
|
||||
Remote URLs are prefixed with `/v2/` for `package: docker` remotes:
|
||||
|
||||
```
|
||||
{base_url}/v2/{path}
|
||||
```
|
||||
|
||||
e.g. `library/nginx/manifests/latest` → `https://registry-1.docker.io/v2/library/nginx/manifests/latest`
|
||||
|
||||
### Authentication
|
||||
|
||||
Docker registries use Bearer token challenges. On a `401 Unauthorized` response, the API:
|
||||
|
||||
1. Parses the `WWW-Authenticate: Bearer` header for `realm`, `service`, and `scope`.
|
||||
2. Fetches a token from the auth realm, supplying `username`/`password` from the remote config if present.
|
||||
3. Retries the request with `Authorization: Bearer <token>`.
|
||||
|
||||
Tokens are cached in-memory keyed by `(realm, service, scope, username)` and expire 30 seconds before their stated `expires_in`.
|
||||
|
||||
### Cache classification
|
||||
|
||||
| path pattern | mutable | class | TTL source |
|
||||
|---|---|---|---|
|
||||
| `/manifests/<tag>` | yes | index | `index_ttl` |
|
||||
| `/tags/list` | yes | index | `index_ttl` |
|
||||
| `/manifests/sha256:<digest>` | no | file | `file_ttl` |
|
||||
| `/blobs/sha256:<digest>` | no | file | `file_ttl` |
|
||||
|
||||
Tag-based manifests and tag lists are mutable and cached as index. Digest-pinned manifests and blobs are content-addressed and cached indefinitely as files.
|
||||
|
||||
### Blob deduplication
|
||||
|
||||
Blobs are stored under a digest-keyed path shared across all images on the same remote:
|
||||
|
||||
```
|
||||
{remote_name}/blobs/sha256/{digest}
|
||||
```
|
||||
|
||||
The same layer pulled by different images is stored once.
|
||||
|
||||
### Accept headers
|
||||
|
||||
| path | `Accept` header sent upstream |
|
||||
|---|---|
|
||||
| `/manifests/…` | `application/vnd.docker.distribution.manifest.v2+json`, `application/vnd.oci.image.manifest.v1+json`, `application/vnd.oci.image.index.v1+json`, `application/vnd.docker.distribution.manifest.list.v2+json` |
|
||||
| `/blobs/…` | `application/octet-stream` |
|
||||
|
||||
---
|
||||
|
||||
## OCI Distribution API endpoint
|
||||
|
||||
The API exposes a native Docker registry interface so clients can use `docker pull` directly:
|
||||
|
||||
```
|
||||
GET /v2/ — version ping
|
||||
GET /v2/{remote}/{image}/manifests/{ref} — fetch manifest
|
||||
HEAD /v2/{remote}/{image}/manifests/{ref} — manifest metadata
|
||||
GET /v2/{remote}/{image}/blobs/{digest} — fetch blob
|
||||
HEAD /v2/{remote}/{image}/blobs/{digest} — blob metadata
|
||||
```
|
||||
|
||||
Responses include `Docker-Distribution-Api-Version`, `Docker-Content-Digest`, and the correct OCI `Content-Type` (detected from the manifest `mediaType` field).
|
||||
|
||||
Only remotes with `package: docker` are accessible via this endpoint. All other remotes return `400`.
|
||||
|
||||
---
|
||||
|
||||
## include_patterns
|
||||
|
||||
`include_patterns` is a list of Python regexes applied to every request before any upstream fetch or cache lookup.
|
||||
|
||||
**Generic remotes (`/api/v1/remote/…`):**
|
||||
- Patterns match against the file path and the full path.
|
||||
- Index files (mutable metadata) bypass pattern checks and are always allowed.
|
||||
|
||||
**Docker remotes (`/v2/…`):**
|
||||
- Patterns match against the image name (first two path segments, e.g. `library/nginx`) and the full path.
|
||||
- The index-file exemption does **not** apply — patterns restrict whole images, including their manifests and tag lists.
|
||||
- No patterns configured → all images allowed.
|
||||
|
||||
Returns `403` when a request is blocked.
|
||||
|
||||
---
|
||||
|
||||
## Versioning
|
||||
|
||||
The package version is derived from git tags via `hatch-vcs`. Tags follow the format `v{MAJOR}.{MINOR}.{PATCH}`.
|
||||
|
||||
Docker images are built with the version injected at build time:
|
||||
|
||||
```
|
||||
SETUPTOOLS_SCM_PRETEND_VERSION=<version> uv sync --frozen
|
||||
```
|
||||
|
||||
The `Makefile` provides `patch`, `minor`, and `major` targets that tag the current commit and rebuild the container image.
|
||||
+1
-6
@@ -5,13 +5,9 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- VERSION=2.2.2.dev0
|
||||
no_cache: true
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./remotes.yaml:/app/remotes.yaml:ro,z
|
||||
- ./ca-bundle.pem:/app/ca-bundle.pem:ro,z
|
||||
environment:
|
||||
- CONFIG_PATH=/app/remotes.yaml
|
||||
- DBHOST=postgres
|
||||
@@ -25,7 +21,6 @@ services:
|
||||
- MINIO_SECRET_KEY=minioadmin
|
||||
- MINIO_BUCKET=artifacts
|
||||
- MINIO_SECURE=false
|
||||
- REQUESTS_CA_BUNDLE=/app/ca-bundle.pem
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
+3
-19
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "artifactapi"
|
||||
dynamic = ["version"]
|
||||
version = "2.0.0"
|
||||
description = "Generic artifact caching system with support for various package managers"
|
||||
|
||||
dependencies = [
|
||||
@@ -23,12 +23,9 @@ license = {text = "MIT"}
|
||||
artifactapi = "artifactapi.main:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling", "hatch-vcs"]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.version]
|
||||
source = "vcs"
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
|
||||
@@ -42,18 +39,5 @@ dev = [
|
||||
"black>=23.9.0",
|
||||
"isort>=5.12.0",
|
||||
"mypy>=1.6.0",
|
||||
"ruff>=0.4.0",
|
||||
"tox>=4.0.0",
|
||||
"pre-commit>=3.0.0",
|
||||
"ruff>=0.1.0",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 140
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "UP"]
|
||||
ignore = ["E501"]
|
||||
|
||||
-282
@@ -1,282 +0,0 @@
|
||||
# Example remotes configuration — copy and adapt for your environment.
|
||||
#
|
||||
# immutable_patterns: artifacts cached forever (e.g. release binaries, versioned tags).
|
||||
# mutable_patterns: artifacts that expire after cache.mutable_ttl seconds and are
|
||||
# re-fetched from upstream on next request (e.g. index files,
|
||||
# branch archives). Defaults to the package-type built-ins when
|
||||
# not set (APKINDEX, repomd.xml, Docker manifests, etc.).
|
||||
# cache:
|
||||
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
|
||||
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
|
||||
#
|
||||
# WARNING: this file may contain credentials — do not commit real values.
|
||||
#
|
||||
# Global configuration
|
||||
#s3:
|
||||
# endpoint: "localhost:9000"
|
||||
# access_key: "minioadmin"
|
||||
# secret_key: "minioadmin"
|
||||
# bucket: "artifacts"
|
||||
# secure: false
|
||||
#
|
||||
#redis:
|
||||
# url: "redis://localhost:6379/0"
|
||||
#
|
||||
#database:
|
||||
# url: "postgresql://artifacts:artifacts123@localhost:5432/artifacts"
|
||||
#
|
||||
remotes:
|
||||
github:
|
||||
base_url: "https://github.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub releases and files"
|
||||
immutable_patterns:
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
- "lxc/incus/.*\\.tar\\.gz$"
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "VictoriaMetrics/VictoriaMetrics/.*/vmutils-linux-amd64-.*\\.tar\\.gz$"
|
||||
- "VictoriaMetrics/VictoriaMetrics/.*/victoria-metrics-linux-amd64-.*-cluster\\.tar\\.gz$"
|
||||
- "VictoriaMetrics/VictoriaMetrics/.*/victoria-logs-linux-amd64-.*\\.tar\\.gz$"
|
||||
- "VictoriaMetrics/VictoriaMetrics/.*/vlutils-linux-amd64-.*\\.tar\\.gz$"
|
||||
- "prometheus-community/bind_exporter/.*/bind_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "prometheus-community/pgbouncer_exporter/.*/pgbouncer_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "prometheus-community/postgres_exporter/.*/postgres_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "onedr0p/exportarr/.*/exportarr_.*_linux_amd64\\.tar\\.gz$"
|
||||
- "tynany/frr_exporter/.*/frr_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "camptocamp/prometheus-puppetdb-exporter/.*/prometheus-puppetdb-exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
- "grafana/jsonnet-language-server/.*/jsonnet-language-server_.*_linux_amd64$"
|
||||
- "helmfile/helmfile/.*/helmfile_.*_linux_amd64\\.tar\\.gz$"
|
||||
- "helmfile/vals/.*/vals_.*_linux_amd64\\.tar\\.gz$"
|
||||
- "openbao/openbao-plugins/.*/openbao-plugin-secrets-consul_linux_amd64_.*\\.tar\\.gz$"
|
||||
- "openbao/openbao-plugins/.*/openbao-plugin-secrets-nomad_linux_amd64_.*\\.tar\\.gz$"
|
||||
- "apple/foundationdb/.*/libfdb_c\\.x86_64\\.so$"
|
||||
- "stalwartlabs/stalwart/.*/stalwart-cli-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||
- "stalwartlabs/stalwart/.*/stalwart-foundationdb-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||
- "stalwartlabs/stalwart/.*/stalwart-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 0
|
||||
|
||||
github-archive:
|
||||
base_url: "https://github.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub repository archive tarballs"
|
||||
immutable_patterns:
|
||||
# Tag archives are immutable — a tag never changes
|
||||
- ".*/archive/refs/tags/.*\\.tar\\.gz$"
|
||||
mutable_patterns:
|
||||
# Branch archives can change on every push
|
||||
- ".*/archive/refs/heads/main\\.tar\\.gz$"
|
||||
- ".*/archive/refs/heads/master\\.tar\\.gz$"
|
||||
# Before re-downloading an expired branch archive, check whether it has
|
||||
# actually changed (304 Not Modified → just refresh the TTL, no transfer).
|
||||
# Only applies to user-defined mutable_patterns, not package-type defaults.
|
||||
check_mutable_updates: true
|
||||
cache:
|
||||
immutable_ttl: 0 # Tag archives cached indefinitely
|
||||
mutable_ttl: 86400 # Branch archives refreshed after 1 day
|
||||
|
||||
gitea-dl:
|
||||
base_url: "https://dl.gitea.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "Gitea download site"
|
||||
immutable_patterns:
|
||||
- "act_runner/.*/act_runner-.*-linux-amd64$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 0
|
||||
|
||||
hashicorp-releases:
|
||||
base_url: "https://releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "HashiCorp product releases"
|
||||
immutable_patterns:
|
||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||
- "terraform/.*terraform_.*_windows_amd64\\.zip$"
|
||||
- "terraform/.*terraform_.*_darwin_amd64\\.zip$"
|
||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||
- "vault/.*vault_.*_windows_amd64\\.zip$"
|
||||
- "vault/.*vault_.*_darwin_amd64\\.zip$"
|
||||
- "consul-cni/.*/consul-cni_.*_linux_amd64\\.zip$"
|
||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||
- "nomad-autoscaler/.*/nomad-autoscaler_.*_linux_amd64\\.zip$"
|
||||
- "nomad/.*/nomad_.*_linux_amd64\\.zip$"
|
||||
- "packer/.*/packer_.*_linux_amd64\\.zip$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 0
|
||||
|
||||
alpine:
|
||||
base_url: "https://dl-cdn.alpinelinux.org"
|
||||
type: "remote"
|
||||
package: "alpine"
|
||||
description: "Alpine Linux APK package repository"
|
||||
immutable_patterns:
|
||||
- ".*/x86_64/.*\\.apk$"
|
||||
# check_mutable_updates not set: APKINDEX.tar.gz is a package-type default
|
||||
# and is always re-fetched on expiry — conditional checks are skipped for
|
||||
# built-in mutable patterns regardless of this flag.
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 7200 # Index files (APKINDEX.tar.gz) cached for 2 hours
|
||||
|
||||
almalinux:
|
||||
base_url: "https://gsl-syd.mm.fcix.net/almalinux"
|
||||
type: "remote"
|
||||
package: "rpm"
|
||||
description: "AlmaLinux RPM package repository"
|
||||
immutable_patterns:
|
||||
- ".*/x86_64/.*\\.rpm$"
|
||||
- ".*/noarch/.*\\.rpm$"
|
||||
- ".*/repodata/.*$"
|
||||
- ".*\\.rpm$" # Allow all RPM files
|
||||
# repomd.xml / repodata are package-type defaults — always re-fetched on
|
||||
# expiry. check_mutable_updates would only apply to any custom
|
||||
# mutable_patterns added here.
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 7200 # Metadata files cached for 2 hours
|
||||
|
||||
epel:
|
||||
base_url: "http://mirror.aarnet.edu.au/pub/epel"
|
||||
type: "remote"
|
||||
package: "rpm"
|
||||
description: "EPEL (Extra Packages for Enterprise Linux)"
|
||||
immutable_patterns:
|
||||
- "8/Everything/x86_64/.*\\.rpm$"
|
||||
- "9/Everything/x86_64/.*\\.rpm$"
|
||||
- "10/Everything/x86_64/.*\\.rpm$"
|
||||
- ".*/noarch/.*\\.rpm$"
|
||||
- ".*/repodata/.*$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 7200 # Metadata files cached for 2 hours
|
||||
|
||||
fedora:
|
||||
base_url: "https://gsl-syd.mm.fcix.net/fedora/linux"
|
||||
type: "remote"
|
||||
package: "rpm"
|
||||
description: "Fedora Linux RPM package repository"
|
||||
immutable_patterns:
|
||||
- "releases/.*/Everything/x86_64/.*\\.rpm$"
|
||||
- "updates/.*/Everything/x86_64/.*\\.rpm$"
|
||||
- "development/.*/Everything/x86_64/.*\\.rpm$"
|
||||
- ".*/noarch/.*\\.rpm$"
|
||||
- "updates/.*/Everything/x86_64/repodata/.*$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 300 # Metadata files cached for 5 minutes
|
||||
|
||||
ghcr:
|
||||
base_url: "https://ghcr.io"
|
||||
type: "remote"
|
||||
package: "docker"
|
||||
description: "GitHub Container Registry"
|
||||
# username: "your-github-username"
|
||||
# password: "your-github-pat" # needs read:packages scope
|
||||
# Docker manifest/tag-list patterns are package-type defaults — always
|
||||
# re-fetched on expiry. check_mutable_updates only applies to any custom
|
||||
# mutable_patterns you add (e.g. a metadata endpoint).
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
|
||||
dockerhub:
|
||||
base_url: "https://registry-1.docker.io"
|
||||
type: "remote"
|
||||
package: "docker"
|
||||
description: "Docker Hub registry"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
|
||||
pypi:
|
||||
base_url: "https://pypi.org"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Python Package Index — simple repository API"
|
||||
# pypi_files_url: the upstream host used in simple-index hrefs (default: files.pythonhosted.org)
|
||||
# pypi_files_remote: our proxy remote that will serve those files (default: pypi-files)
|
||||
pypi_files_url: "https://files.pythonhosted.org"
|
||||
pypi_files_remote: "pypi-files"
|
||||
check_mutable_updates: true
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # Simple index pages refreshed after 10 minutes
|
||||
|
||||
pypi-gitea:
|
||||
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Private Gitea PyPI registry"
|
||||
# username: "your-gitea-username"
|
||||
# password: "your-personal-access-token" # needs package:read scope
|
||||
# Files are served from the same Gitea instance — rewrite back to this same remote
|
||||
pypi_files_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||
pypi_files_remote: "pypi-gitea"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "files/.*\\.whl$"
|
||||
- "files/.*\\.whl\\.metadata$"
|
||||
- "files/.*\\.tar\\.gz$"
|
||||
- "files/.*\\.zip$"
|
||||
- "files/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600
|
||||
|
||||
pypi-files:
|
||||
base_url: "https://files.pythonhosted.org"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "Python Package Index — file storage (wheels, sdists)"
|
||||
immutable_patterns:
|
||||
- "packages/.*\\.whl$"
|
||||
- "packages/.*\\.whl\\.metadata$"
|
||||
- "packages/.*\\.tar\\.gz$"
|
||||
- "packages/.*\\.zip$"
|
||||
- "packages/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Package files are content-addressed — cache forever
|
||||
|
||||
npm:
|
||||
base_url: "https://registry.npmjs.org"
|
||||
type: "remote"
|
||||
package: "npm"
|
||||
description: "npm registry — package metadata with tarball URL rewriting"
|
||||
# npm_files_url: the upstream host used in metadata tarball hrefs (default: https://registry.npmjs.org)
|
||||
# npm_files_remote: our proxy remote that will serve those tarballs (default: npm-files)
|
||||
npm_files_url: "https://registry.npmjs.org"
|
||||
npm_files_remote: "npm"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- \.tgz$
|
||||
mutable_patterns:
|
||||
- ^(?!.*\.tgz$).*
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # Package metadata refreshed after 10 minutes
|
||||
|
||||
hashicorp-helm:
|
||||
base_url: "https://helm.releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "helm"
|
||||
description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "\\.tgz$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Chart tarballs are versioned — cache forever
|
||||
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||
|
||||
local-generic:
|
||||
type: "local"
|
||||
package: "generic"
|
||||
description: "Local generic file repository"
|
||||
cache:
|
||||
immutable_ttl: 0 # Files cached indefinitely
|
||||
mutable_ttl: 0
|
||||
+21
-64
@@ -1,7 +1,5 @@
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
|
||||
import hashlib
|
||||
import redis
|
||||
|
||||
|
||||
@@ -19,20 +17,24 @@ class RedisCache:
|
||||
self.client = None
|
||||
self.available = False
|
||||
|
||||
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||
"""Return True if file_path matches any of the mutable patterns."""
|
||||
if patterns is None:
|
||||
patterns = []
|
||||
return any(re.search(p, file_path) for p in patterns)
|
||||
def is_index_file(self, file_path: str) -> bool:
|
||||
"""Check if the file is an index file that should have TTL"""
|
||||
return (
|
||||
file_path.endswith("APKINDEX.tar.gz")
|
||||
or file_path.endswith("Packages.gz")
|
||||
or file_path.endswith("repomd.xml")
|
||||
or "repodata/" in file_path
|
||||
and file_path.endswith((".xml", ".xml.gz", ".xml.bz2", ".xml.xz"))
|
||||
)
|
||||
|
||||
def get_index_cache_key(self, remote_name: str, path: str) -> str:
|
||||
"""Generate cache key for index files"""
|
||||
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def get_mutable_meta_key(self, remote_name: str, path: str) -> str:
|
||||
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||
"""Check if mutable file is still within its TTL window."""
|
||||
def is_index_valid(
|
||||
self, remote_name: str, path: str, ttl_override: int = None
|
||||
) -> bool:
|
||||
"""Check if index file is still valid (not expired)"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -43,7 +45,7 @@ class RedisCache:
|
||||
return False
|
||||
|
||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||
"""Set or refresh the TTL key for a mutable file."""
|
||||
"""Mark index file as cached with TTL"""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
@@ -53,60 +55,15 @@ class RedisCache:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||
"""Persist ETag and Last-Modified for future conditional requests."""
|
||||
if not self.available:
|
||||
return
|
||||
data = {}
|
||||
if etag:
|
||||
data["etag"] = etag
|
||||
if last_modified:
|
||||
data["last_modified"] = last_modified
|
||||
if not data:
|
||||
return
|
||||
try:
|
||||
self.client.hset(self.get_mutable_meta_key(remote_name, path), mapping=data)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
|
||||
if not self.available:
|
||||
return {}
|
||||
try:
|
||||
return self.client.hgetall(self.get_mutable_meta_key(remote_name, path)) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def delete_mutable_meta(self, remote_name: str, path: str) -> None:
|
||||
if not self.available:
|
||||
return
|
||||
try:
|
||||
self.client.delete(self.get_mutable_meta_key(remote_name, path))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||
"""Remove an expired mutable file from S3 and clear its Redis meta."""
|
||||
"""Remove expired index from S3 storage"""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
try:
|
||||
import os
|
||||
|
||||
from .config import ConfigManager
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if config_path:
|
||||
config = ConfigManager(config_path)
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if remote_config:
|
||||
base_url = remote_config.get("base_url")
|
||||
if base_url:
|
||||
s3_key = storage.get_object_key(remote_name, path)
|
||||
if storage.exists(s3_key):
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
||||
# Get the S3 key and remove it
|
||||
s3_key = storage.get_object_key_from_path(remote_name, path)
|
||||
if storage.exists(s3_key):
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.delete_mutable_meta(remote_name, path)
|
||||
|
||||
+20
-61
@@ -1,32 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import json
|
||||
import yaml
|
||||
|
||||
_PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = {
|
||||
"alpine": [
|
||||
r"APKINDEX\.tar\.gz$",
|
||||
],
|
||||
"rpm": [
|
||||
r"repomd\.xml$",
|
||||
r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst"
|
||||
r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst"
|
||||
r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$",
|
||||
r"Packages\.gz$",
|
||||
],
|
||||
"docker": [
|
||||
r"/manifests/(?!sha256:)[^/]+$",
|
||||
r"/tags/list$",
|
||||
],
|
||||
"pypi": [
|
||||
r"simple/", # Per-package and top-level simple index pages
|
||||
],
|
||||
"npm": [],
|
||||
"helm": [
|
||||
r"index\.yaml$",
|
||||
],
|
||||
"generic": [],
|
||||
}
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ConfigManager:
|
||||
@@ -37,8 +12,10 @@ class ConfigManager:
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
try:
|
||||
with open(self.config_file) as f:
|
||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(".yml"):
|
||||
with open(self.config_file, "r") as f:
|
||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(
|
||||
".yml"
|
||||
):
|
||||
return yaml.safe_load(f)
|
||||
else:
|
||||
return json.load(f)
|
||||
@@ -58,25 +35,30 @@ class ConfigManager:
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def get_remote_config(self, remote_name: str) -> dict | None:
|
||||
def get_remote_config(self, remote_name: str) -> Optional[dict]:
|
||||
self._check_reload()
|
||||
return self.config.get("remotes", {}).get(remote_name)
|
||||
|
||||
def get_immutable_patterns(self, remote_name: str, repo_path: str = "") -> list[str]:
|
||||
def get_repository_patterns(self, remote_name: str, repo_path: str) -> list:
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return []
|
||||
|
||||
repositories = remote_config.get("repositories", {})
|
||||
|
||||
# Handle both dict (GitHub style) and list (Alpine style) repositories
|
||||
if isinstance(repositories, dict):
|
||||
repo_config = repositories.get(repo_path)
|
||||
if repo_config:
|
||||
patterns = repo_config.get("immutable_patterns", [])
|
||||
patterns = repo_config.get("include_patterns", [])
|
||||
else:
|
||||
patterns = remote_config.get("immutable_patterns", [])
|
||||
patterns = remote_config.get("include_patterns", [])
|
||||
elif isinstance(repositories, list):
|
||||
# For Alpine, repositories is just a list of allowed repo names
|
||||
# Pattern matching is handled by the main include_patterns
|
||||
patterns = remote_config.get("include_patterns", [])
|
||||
else:
|
||||
patterns = remote_config.get("immutable_patterns", [])
|
||||
patterns = remote_config.get("include_patterns", [])
|
||||
|
||||
return patterns
|
||||
|
||||
@@ -110,7 +92,9 @@ class ConfigManager:
|
||||
if not redis_url:
|
||||
raise ValueError("REDIS_URL environment variable is required")
|
||||
|
||||
return {"url": redis_url}
|
||||
return {
|
||||
"url": redis_url
|
||||
}
|
||||
|
||||
def get_database_config(self) -> dict:
|
||||
"""Get database configuration from environment variables"""
|
||||
@@ -121,37 +105,12 @@ class ConfigManager:
|
||||
db_name = os.getenv("DBNAME")
|
||||
|
||||
if not all([db_host, db_port, db_user, db_pass, db_name]):
|
||||
missing = [
|
||||
var
|
||||
for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)]
|
||||
if not val
|
||||
]
|
||||
missing = [var for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)] if not val]
|
||||
raise ValueError(f"All database environment variables are required: {', '.join(missing)}")
|
||||
|
||||
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
||||
return {"url": db_url}
|
||||
|
||||
def get_user_mutable_patterns(self, remote_name: str) -> list[str]:
|
||||
"""Return only user-configured mutable_patterns, excluding package-type defaults."""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return []
|
||||
return remote_config.get("mutable_patterns", [])
|
||||
|
||||
def get_mutable_patterns(self, remote_name: str) -> list[str]:
|
||||
"""Return mutable-file patterns for a remote (TTL is configured per-remote in cache.index_ttl).
|
||||
|
||||
Merges the package-level defaults with any extra patterns listed under
|
||||
``mutable_patterns`` in the remote's config.
|
||||
"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return []
|
||||
package = remote_config.get("package", "generic")
|
||||
defaults = _PACKAGE_MUTABLE_PATTERNS.get(package, [])
|
||||
extra = remote_config.get("mutable_patterns", [])
|
||||
return defaults + [p for p in extra if p not in defaults]
|
||||
|
||||
def get_cache_config(self, remote_name: str) -> dict:
|
||||
"""Get cache configuration for a specific remote"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
@@ -52,15 +54,25 @@ class DatabaseManager:
|
||||
""")
|
||||
|
||||
# Create indexes separately
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)")
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)"
|
||||
)
|
||||
print("Database schema initialized")
|
||||
except Exception as e:
|
||||
print(f"Error creating schema: {e}")
|
||||
|
||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||
def record_artifact_mapping(
|
||||
self, s3_key: str, remote_name: str, file_path: str, size_bytes: int
|
||||
):
|
||||
"""Record mapping between S3 key and remote"""
|
||||
if not self.available:
|
||||
return
|
||||
@@ -100,7 +112,7 @@ class DatabaseManager:
|
||||
print(f"Error getting storage by remote: {e}")
|
||||
return {}
|
||||
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> Optional[str]:
|
||||
"""Get remote name for given S3 key"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# In-memory token cache: key -> (token, expires_at)
|
||||
_token_cache: dict[str, tuple[str, float]] = {}
|
||||
|
||||
_WWW_AUTH_RE = re.compile(
|
||||
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
||||
r'(?:,service="(?P<service>[^"]*)")?'
|
||||
r'(?:,scope="(?P<scope>[^"]*)")?',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||
|
||||
|
||||
def _get_cached_token(key: str) -> str | None:
|
||||
entry = _token_cache.get(key)
|
||||
if entry and entry[1] > time.time():
|
||||
return entry[0]
|
||||
_token_cache.pop(key, None)
|
||||
return None
|
||||
|
||||
|
||||
def _store_token(key: str, token: str, expires_in: int) -> None:
|
||||
# Expire 30s early to avoid using a token right as it expires
|
||||
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
||||
|
||||
|
||||
async def fetch_token(
|
||||
realm: str,
|
||||
service: str,
|
||||
scope: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||
key = _cache_key(realm, service, scope, username)
|
||||
cached = _get_cached_token(key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
params: dict[str, str] = {}
|
||||
if service:
|
||||
params["service"] = service
|
||||
if scope:
|
||||
params["scope"] = scope
|
||||
|
||||
auth = (username, password) if username and password else None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(realm, params=params, auth=auth)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
||||
return None
|
||||
|
||||
token = data.get("token") or data.get("access_token")
|
||||
if not token:
|
||||
logger.warning(f"Docker token response missing token field: {data}")
|
||||
return None
|
||||
|
||||
expires_in = int(data.get("expires_in", 300))
|
||||
_store_token(key, token, expires_in)
|
||||
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
||||
return token
|
||||
|
||||
|
||||
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||
m = _WWW_AUTH_RE.search(header)
|
||||
if not m:
|
||||
return None
|
||||
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
||||
|
||||
|
||||
async def get_docker_token_for_response(
|
||||
www_authenticate: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||
parsed = parse_www_authenticate(www_authenticate)
|
||||
if not parsed:
|
||||
return None
|
||||
realm, service, scope = parsed
|
||||
return await fetch_token(realm, service, scope, username, password)
|
||||
+157
-428
@@ -1,31 +1,18 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import hashlib
|
||||
from typing import Dict, Any, Optional
|
||||
import httpx
|
||||
from fastapi import FastAPI, File, HTTPException, Query, Request, Response, UploadFile
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||
from fastapi import FastAPI, HTTPException, Response, Query, File, UploadFile
|
||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
|
||||
__version__ = version("artifactapi")
|
||||
except ImportError:
|
||||
# Fallback for development when package isn't installed
|
||||
__version__ = "dev"
|
||||
|
||||
from .cache import RedisCache
|
||||
from .config import ConfigManager
|
||||
from .database import DatabaseManager
|
||||
from .docker_auth import get_docker_token_for_response
|
||||
from .metrics import MetricsManager
|
||||
from .storage import S3Storage
|
||||
from .cache import RedisCache
|
||||
from .metrics import MetricsManager
|
||||
|
||||
|
||||
class ArtifactRequest(BaseModel):
|
||||
@@ -33,15 +20,7 @@ class ArtifactRequest(BaseModel):
|
||||
include_pattern: str
|
||||
|
||||
|
||||
class UpstreamUnreachable(Exception):
|
||||
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||
app = FastAPI(title="Artifact Storage API", version="2.0.0")
|
||||
|
||||
# Initialize components using config
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
@@ -66,7 +45,7 @@ def read_root():
|
||||
config._check_reload()
|
||||
return {
|
||||
"message": "Artifact Storage API",
|
||||
"version": app.version,
|
||||
"version": "2.0.0",
|
||||
"remotes": list(config.config.get("remotes", {}).keys()),
|
||||
}
|
||||
|
||||
@@ -76,107 +55,31 @@ def health_check():
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
@app.put("/cache/flush")
|
||||
def flush_cache(
|
||||
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
||||
):
|
||||
"""Flush cache entries for specified remote or all remotes"""
|
||||
try:
|
||||
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||
|
||||
# Flush Redis entries based on cache_type
|
||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||
patterns = []
|
||||
|
||||
if cache_type in ["all", "index"]:
|
||||
if remote:
|
||||
patterns.append(f"index:{remote}:*")
|
||||
patterns.append(f"mutable:meta:{remote}:*")
|
||||
else:
|
||||
patterns.append("index:*")
|
||||
patterns.append("mutable:meta:*")
|
||||
|
||||
if cache_type in ["all", "metrics"]:
|
||||
if remote:
|
||||
patterns.append(f"metrics:*:{remote}")
|
||||
else:
|
||||
patterns.append("metrics:*")
|
||||
|
||||
for pattern in patterns:
|
||||
keys = cache.client.keys(pattern)
|
||||
if keys:
|
||||
cache.client.delete(*keys)
|
||||
result["flushed"]["redis_keys"] += len(keys)
|
||||
logger.info(f"Cache flush: Deleted {len(keys)} Redis keys matching '{pattern}'")
|
||||
|
||||
if result["flushed"]["redis_keys"] > 0:
|
||||
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
||||
|
||||
# Flush S3 objects if requested
|
||||
if cache_type in ["all", "files"]:
|
||||
try:
|
||||
# Use prefix filtering for remote-specific deletion
|
||||
list_params = {"Bucket": storage.bucket}
|
||||
if remote:
|
||||
list_params["Prefix"] = f"{remote}/"
|
||||
|
||||
response = storage.client.list_objects_v2(**list_params)
|
||||
if "Contents" in response:
|
||||
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||
|
||||
for key in objects_to_delete:
|
||||
try:
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
||||
result["flushed"]["s3_objects"] += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
||||
|
||||
if objects_to_delete:
|
||||
scope = f" for remote '{remote}'" if remote else ""
|
||||
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
logger.info(f"Cache flush: Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
|
||||
except Exception as e:
|
||||
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
||||
logger.error(f"Cache flush S3 error: {e}")
|
||||
|
||||
if not result["flushed"]["operations"]:
|
||||
result["flushed"]["operations"].append("No cache entries found to flush")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cache flush error: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
||||
|
||||
|
||||
async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
|
||||
base_url = remote_config.get("base_url")
|
||||
if not base_url:
|
||||
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||
|
||||
# Handle Docker registry URLs
|
||||
if remote_config.get("package") == "docker":
|
||||
# Convert Docker paths to v2 API format
|
||||
# e.g., library/nginx/manifests/latest -> v2/library/nginx/manifests/latest
|
||||
return f"{base_url}/v2/{path}"
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"No base_url configured for remote '{remote_name}'"
|
||||
)
|
||||
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: str, full_path: str) -> bool:
|
||||
# Mutable files (index files) are always allowed through
|
||||
mutable_patterns = config.get_mutable_patterns(remote_name)
|
||||
if cache.is_mutable_file(file_path, mutable_patterns) or cache.is_mutable_file(full_path, mutable_patterns):
|
||||
async def check_artifact_patterns(
|
||||
remote_name: str, repo_path: str, file_path: str, full_path: str
|
||||
) -> bool:
|
||||
# First check if this is an index file - always allow index files
|
||||
if cache.is_index_file(file_path) or cache.is_index_file(full_path):
|
||||
return True
|
||||
|
||||
# Check immutable include patterns
|
||||
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
||||
# Then check basic include patterns
|
||||
patterns = config.get_repository_patterns(remote_name, repo_path)
|
||||
if not patterns:
|
||||
return True # Allow all if no patterns configured
|
||||
|
||||
@@ -190,15 +93,18 @@ async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: s
|
||||
if not pattern_matched:
|
||||
return False
|
||||
|
||||
# All remotes now use pattern-based filtering only - no additional checks needed
|
||||
return True
|
||||
|
||||
|
||||
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
||||
# Use hierarchical path-based key
|
||||
key = storage.get_object_key(remote_name, path)
|
||||
# Check if using URL-based key or path-based key
|
||||
if url.startswith("http"):
|
||||
key = storage.get_object_key(url)
|
||||
else:
|
||||
key = storage.get_object_key_from_path(remote_name, path)
|
||||
|
||||
if storage.exists(key):
|
||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||
return {
|
||||
"url": url,
|
||||
"cached_url": storage.get_url(key),
|
||||
@@ -206,198 +112,32 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
||||
}
|
||||
|
||||
try:
|
||||
remote_config = config.get_remote_config(remote_name) or {}
|
||||
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
||||
|
||||
# Prepare headers
|
||||
headers = {}
|
||||
username = remote_config.get("username")
|
||||
password = remote_config.get("password")
|
||||
|
||||
if is_docker:
|
||||
if "/manifests/" in url:
|
||||
headers["Accept"] = (
|
||||
"application/vnd.docker.distribution.manifest.v2+json,"
|
||||
"application/vnd.oci.image.manifest.v1+json,"
|
||||
"application/vnd.oci.image.index.v1+json,"
|
||||
"application/vnd.docker.distribution.manifest.list.v2+json"
|
||||
)
|
||||
elif "/blobs/" in url:
|
||||
headers["Accept"] = "application/octet-stream"
|
||||
elif username and password:
|
||||
headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
# Handle Docker Bearer token challenge
|
||||
if response.status_code == 401 and is_docker:
|
||||
www_auth = response.headers.get("WWW-Authenticate", "")
|
||||
username = remote_config.get("username")
|
||||
password = remote_config.get("password")
|
||||
token = await get_docker_token_for_response(www_auth, username, password)
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
storage_path = storage.upload(key, response.content)
|
||||
|
||||
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"cached_url": storage.get_url(key),
|
||||
"storage_path": storage_path,
|
||||
"size": len(response.content),
|
||||
"status": "cached",
|
||||
"etag": response.headers.get("ETag"),
|
||||
"last_modified": response.headers.get("Last-Modified"),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"url": url, "status": "error", "error": str(e)}
|
||||
|
||||
|
||||
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
||||
username = remote_cfg.get("username")
|
||||
password = remote_cfg.get("password")
|
||||
if username and password:
|
||||
token = base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||
return {"Authorization": f"Basic {token}"}
|
||||
return {}
|
||||
|
||||
|
||||
async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool:
|
||||
"""HEAD with a short timeout. Returns False only on network/timeout errors."""
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
await client.head(url, headers=auth_headers or {}, timeout=10.0)
|
||||
return True
|
||||
except (httpx.NetworkError, httpx.TimeoutException):
|
||||
return False
|
||||
except Exception:
|
||||
return True # 4xx/5xx means backend is up
|
||||
|
||||
|
||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str, auth_headers: dict | None = None) -> bool:
|
||||
"""Conditional HEAD against upstream. Returns False only on a definitive 304.
|
||||
Raises UpstreamUnreachable if the backend cannot be contacted."""
|
||||
meta = cache.get_mutable_meta(remote_name, path)
|
||||
if not meta:
|
||||
return True
|
||||
|
||||
headers = dict(auth_headers or {})
|
||||
if meta.get("etag"):
|
||||
headers["If-None-Match"] = meta["etag"]
|
||||
if meta.get("last_modified"):
|
||||
headers["If-Modified-Since"] = meta["last_modified"]
|
||||
if not (meta.get("etag") or meta.get("last_modified")):
|
||||
return True
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.head(remote_url, headers=headers)
|
||||
return response.status_code != 304
|
||||
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||
raise UpstreamUnreachable(str(exc)) from exc
|
||||
|
||||
|
||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||
|
||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||
auth = _basic_auth_header(remote_cfg)
|
||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||
|
||||
if user_mutable:
|
||||
try:
|
||||
changed = await check_upstream_changed(remote_url, remote_name, path, auth)
|
||||
except UpstreamUnreachable:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
if not changed:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||
else:
|
||||
if not await _upstream_reachable(remote_url, auth):
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||
|
||||
cache.cleanup_expired_index(storage, remote_name, path)
|
||||
return False
|
||||
|
||||
|
||||
def _get_content_type(filename: str) -> str:
|
||||
if filename.endswith((".tar.gz", ".tgz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||
return "application/zip"
|
||||
if filename.endswith(".exe"):
|
||||
return "application/x-msdownload"
|
||||
if filename.endswith(".rpm"):
|
||||
return "application/x-rpm"
|
||||
if filename.endswith(".xml"):
|
||||
return "application/xml"
|
||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith((".yaml", ".yml")):
|
||||
return "text/yaml"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
remote_config: dict,
|
||||
request: Request,
|
||||
remote_name: str = "",
|
||||
) -> tuple[bytes, str]:
|
||||
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
||||
if remote_config.get("package") == "pypi" and "simple/" in path:
|
||||
files_url = remote_config.get("pypi_files_url", "https://files.pythonhosted.org")
|
||||
files_remote = remote_config.get("pypi_files_remote", "pypi-files")
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
data = data.replace(
|
||||
files_url.rstrip("/").encode(),
|
||||
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
||||
)
|
||||
return data, "text/html; charset=utf-8"
|
||||
if remote_config.get("package") == "npm" and not path.endswith(".tgz"):
|
||||
files_url = remote_config.get("npm_files_url", "https://registry.npmjs.org")
|
||||
files_remote = remote_config.get("npm_files_remote", "npm-files")
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
data = data.replace(
|
||||
files_url.rstrip("/").encode(),
|
||||
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
||||
)
|
||||
return data, "application/json"
|
||||
if remote_config.get("package") == "helm" and filename == "index.yaml":
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/yaml"
|
||||
return data, _get_content_type(filename)
|
||||
|
||||
|
||||
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||
async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
async def get_artifact(remote_name: str, path: str):
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
|
||||
# Check if this is a local repository
|
||||
if remote_config.get("type") == "local":
|
||||
@@ -417,7 +157,9 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=content_type,
|
||||
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={os.path.basename(path)}"
|
||||
},
|
||||
)
|
||||
|
||||
# Extract repository path for pattern checking
|
||||
@@ -431,37 +173,62 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
|
||||
# Check if artifact matches configured patterns
|
||||
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Artifact not allowed by configuration patterns"
|
||||
)
|
||||
|
||||
# Construct the remote URL
|
||||
remote_url = await construct_remote_url(remote_name, path)
|
||||
|
||||
# Check if artifact is already cached
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
# Check if artifact is already cached (try both URL and path-based keys)
|
||||
url_key = storage.get_object_key(remote_url)
|
||||
path_key = storage.get_object_key_from_path(remote_name, path)
|
||||
|
||||
# For mutable files, check Redis TTL validity
|
||||
cached_key = None
|
||||
if storage.exists(url_key):
|
||||
cached_key = url_key
|
||||
elif storage.exists(path_key):
|
||||
cached_key = path_key
|
||||
|
||||
# For index files, check Redis TTL validity
|
||||
filename = os.path.basename(path)
|
||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||
is_index = cache.is_index_file(filename)
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if cached_key and is_index:
|
||||
# Index file exists, but check if it's still valid
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
||||
cached_key = None
|
||||
# Index has expired, remove it from S3
|
||||
cache.cleanup_expired_index(storage, remote_name, path)
|
||||
cached_key = None # Force re-download
|
||||
|
||||
if cached_key:
|
||||
# Return cached artifact
|
||||
try:
|
||||
artifact_data = storage.download_object(cached_key)
|
||||
filename = os.path.basename(path)
|
||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||
|
||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||
# Determine content type based on file extension
|
||||
content_type = "application/octet-stream"
|
||||
if filename.endswith(".tar.gz"):
|
||||
content_type = "application/gzip"
|
||||
elif filename.endswith(".zip"):
|
||||
content_type = "application/zip"
|
||||
elif filename.endswith(".exe"):
|
||||
content_type = "application/x-msdownload"
|
||||
elif filename.endswith(".rpm"):
|
||||
content_type = "application/x-rpm"
|
||||
elif filename.endswith(".xml"):
|
||||
content_type = "application/xml"
|
||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
content_type = "application/gzip"
|
||||
|
||||
# Record cache hit metrics
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||
|
||||
# Record artifact mapping in database if not already recorded
|
||||
database.record_artifact_mapping(
|
||||
cached_key, remote_name, path, len(artifact_data)
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
@@ -473,35 +240,53 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error retrieving cached artifact: {str(e)}"
|
||||
)
|
||||
|
||||
# Artifact not cached, cache it first
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
||||
|
||||
if result["status"] == "error":
|
||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||
raise HTTPException(
|
||||
status_code=502, detail=f"Failed to fetch artifact: {result['error']}"
|
||||
)
|
||||
|
||||
# Mark mutable files as cached in Redis with TTL
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
# Mark index files as cached in Redis if this was a new download
|
||||
if result["status"] == "cached" and is_index:
|
||||
# Get TTL from remote config
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes
|
||||
cache.mark_index_cached(remote_name, path, index_ttl)
|
||||
|
||||
# Now return the cached artifact
|
||||
try:
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
cache_key = storage.get_object_key(remote_url)
|
||||
artifact_data = storage.download_object(cache_key)
|
||||
filename = os.path.basename(path)
|
||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||
|
||||
content_type = "application/octet-stream"
|
||||
if filename.endswith(".tar.gz"):
|
||||
content_type = "application/gzip"
|
||||
elif filename.endswith(".zip"):
|
||||
content_type = "application/zip"
|
||||
elif filename.endswith(".exe"):
|
||||
content_type = "application/x-msdownload"
|
||||
elif filename.endswith(".rpm"):
|
||||
content_type = "application/x-rpm"
|
||||
elif filename.endswith(".xml"):
|
||||
content_type = "application/xml"
|
||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
content_type = "application/gzip"
|
||||
|
||||
# Record cache miss metrics
|
||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||
|
||||
# Record artifact mapping in database
|
||||
cache_key = storage.get_object_key(remote_url)
|
||||
database.record_artifact_mapping(
|
||||
cache_key, remote_name, path, len(artifact_data)
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
@@ -516,89 +301,6 @@ async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/v2/")
|
||||
async def docker_v2_ping():
|
||||
return Response(
|
||||
content="{}",
|
||||
media_type="application/json",
|
||||
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
||||
)
|
||||
|
||||
|
||||
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
||||
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
if remote_config.get("package") != "docker":
|
||||
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
||||
|
||||
# Check immutable_patterns against the image name (e.g. "library/nginx")
|
||||
patterns = config.get_immutable_patterns(remote_name, "")
|
||||
if patterns:
|
||||
path_parts = path.split("/")
|
||||
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
||||
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
||||
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
||||
|
||||
remote_url = await construct_remote_url(remote_name, path)
|
||||
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
||||
cached_key = None
|
||||
|
||||
if not cached_key:
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
||||
if result["status"] == "error":
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
|
||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||
|
||||
is_blob = "/blobs/" in path
|
||||
if is_blob:
|
||||
content_type = "application/octet-stream"
|
||||
else:
|
||||
try:
|
||||
manifest_json = json.loads(artifact_data)
|
||||
content_type = manifest_json.get("mediaType")
|
||||
if not content_type:
|
||||
if "manifests" in manifest_json:
|
||||
content_type = "application/vnd.oci.image.index.v1+json"
|
||||
else:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
except Exception:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
|
||||
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
||||
headers = {
|
||||
"Docker-Distribution-Api-Version": "registry/2.0",
|
||||
"Docker-Content-Digest": digest,
|
||||
"Content-Length": str(len(artifact_data)),
|
||||
}
|
||||
|
||||
if request.method == "HEAD":
|
||||
return Response(status_code=200, headers=headers, media_type=content_type)
|
||||
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
||||
|
||||
|
||||
async def discover_artifacts(remote: str, include_pattern: str) -> list[str]:
|
||||
if "github.com" in remote:
|
||||
return await discover_github_releases(remote, include_pattern)
|
||||
@@ -614,7 +316,9 @@ async def discover_github_releases(remote: str, include_pattern: str) -> list[st
|
||||
owner, repo = match.groups()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||
response = await client.get(
|
||||
f"https://api.github.com/repos/{owner}/{repo}/releases"
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise HTTPException(
|
||||
@@ -643,10 +347,14 @@ async def upload_file(remote_name: str, path: str, file: UploadFile = File(...))
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Upload only supported for local repositories"
|
||||
)
|
||||
|
||||
try:
|
||||
# Read file content
|
||||
@@ -708,7 +416,9 @@ def check_file_exists(remote_name: str, path: str):
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
|
||||
# Handle local repository
|
||||
if remote_config.get("type") == "local":
|
||||
@@ -720,10 +430,16 @@ def check_file_exists(remote_name: str, path: str):
|
||||
return Response(
|
||||
headers={
|
||||
"Content-Length": str(metadata["size_bytes"]),
|
||||
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||
"Content-Type": metadata.get(
|
||||
"content_type", "application/octet-stream"
|
||||
),
|
||||
"X-SHA256": metadata["sha256_sum"],
|
||||
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||
"X-Created-At": metadata["created_at"].isoformat()
|
||||
if metadata["created_at"]
|
||||
else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat()
|
||||
if metadata["uploaded_at"]
|
||||
else "",
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
@@ -732,7 +448,9 @@ def check_file_exists(remote_name: str, path: str):
|
||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||
else:
|
||||
# For remote repositories, just return 405 Method Not Allowed
|
||||
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||
raise HTTPException(
|
||||
status_code=405, detail="HEAD method only supported for local repositories"
|
||||
)
|
||||
|
||||
|
||||
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||
@@ -741,10 +459,14 @@ def delete_file(remote_name: str, path: str):
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Delete only supported for local repositories"
|
||||
)
|
||||
|
||||
try:
|
||||
# Get S3 key before deleting from database
|
||||
@@ -765,9 +487,11 @@ def delete_file(remote_name: str, path: str):
|
||||
|
||||
|
||||
@app.post("/api/v1/artifacts/cache")
|
||||
async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
||||
async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(request.remote, request.include_pattern)
|
||||
matching_urls = await discover_artifacts(
|
||||
request.remote, request.include_pattern
|
||||
)
|
||||
|
||||
if not matching_urls:
|
||||
return {
|
||||
@@ -782,7 +506,11 @@ async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
||||
result = await cache_single_artifact(url, "", "")
|
||||
cached_artifacts.append(result)
|
||||
|
||||
cached_count = sum(1 for artifact in cached_artifacts if artifact["status"] in ["cached", "already_cached"])
|
||||
cached_count = sum(
|
||||
1
|
||||
for artifact in cached_artifacts
|
||||
if artifact["status"] in ["cached", "already_cached"]
|
||||
)
|
||||
|
||||
return {
|
||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||
@@ -795,20 +523,19 @@ async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
||||
|
||||
|
||||
@app.get("/api/v1/artifacts/{remote:path}")
|
||||
async def list_cached_artifacts(remote: str, include_pattern: str = ".*") -> dict[str, Any]:
|
||||
async def list_cached_artifacts(
|
||||
remote: str, include_pattern: str = ".*"
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(remote, include_pattern)
|
||||
|
||||
cached_artifacts = []
|
||||
for url in matching_urls:
|
||||
# Extract path from URL for hierarchical key generation
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path
|
||||
key = storage.get_object_key(remote, path)
|
||||
key = storage.get_object_key(url)
|
||||
if storage.exists(key):
|
||||
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||
cached_artifacts.append(
|
||||
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
||||
)
|
||||
|
||||
return {
|
||||
"remote": remote,
|
||||
@@ -824,7 +551,9 @@ async def list_cached_artifacts(remote: str, include_pattern: str = ".*") -> dic
|
||||
|
||||
@app.get("/metrics")
|
||||
def get_metrics(
|
||||
json: bool | None = Query(False, description="Return JSON format instead of Prometheus"),
|
||||
json: Optional[bool] = Query(
|
||||
False, description="Return JSON format instead of Prometheus"
|
||||
),
|
||||
):
|
||||
"""Get comprehensive metrics about the artifact storage system"""
|
||||
config._check_reload()
|
||||
|
||||
+51
-18
@@ -1,14 +1,22 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from typing import Dict, Any
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
|
||||
# Prometheus metrics
|
||||
request_counter = Counter("artifact_requests_total", "Total artifact requests", ["remote", "status"])
|
||||
request_counter = Counter(
|
||||
"artifact_requests_total", "Total artifact requests", ["remote", "status"]
|
||||
)
|
||||
cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"])
|
||||
cache_miss_counter = Counter("artifact_cache_misses_total", "Total cache misses", ["remote"])
|
||||
bandwidth_saved_counter = Counter("artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"])
|
||||
storage_size_gauge = Gauge("artifact_storage_size_bytes", "Storage size by remote", ["remote"])
|
||||
cache_miss_counter = Counter(
|
||||
"artifact_cache_misses_total", "Total cache misses", ["remote"]
|
||||
)
|
||||
bandwidth_saved_counter = Counter(
|
||||
"artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"]
|
||||
)
|
||||
storage_size_gauge = Gauge(
|
||||
"artifact_storage_size_bytes", "Storage size by remote", ["remote"]
|
||||
)
|
||||
redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys")
|
||||
|
||||
|
||||
@@ -36,7 +44,9 @@ class MetricsManager:
|
||||
# Increment per-remote counters
|
||||
self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}")
|
||||
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
||||
self.redis_client.client.incrby(f"metrics:bandwidth_saved:{remote_name}", size_bytes)
|
||||
self.redis_client.client.incrby(
|
||||
f"metrics:bandwidth_saved:{remote_name}", size_bytes
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -81,7 +91,7 @@ class MetricsManager:
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def get_s3_size_by_remote(self, storage, config_manager) -> dict[str, int]:
|
||||
def get_s3_size_by_remote(self, storage, config_manager) -> Dict[str, int]:
|
||||
"""Get size of stored data per remote using database mappings"""
|
||||
if self.database_manager and self.database_manager.available:
|
||||
# Get from database if available
|
||||
@@ -136,7 +146,7 @@ class MetricsManager:
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def get_metrics(self, storage, config_manager) -> dict[str, Any]:
|
||||
def get_metrics(self, storage, config_manager) -> Dict[str, Any]:
|
||||
"""Get comprehensive metrics"""
|
||||
# Update Redis keys gauge
|
||||
redis_key_count = self.get_redis_key_count()
|
||||
@@ -163,31 +173,54 @@ class MetricsManager:
|
||||
if self.redis_client and self.redis_client.available:
|
||||
try:
|
||||
# Get global metrics
|
||||
cache_hits = int(self.redis_client.client.get("metrics:cache_hits") or 0)
|
||||
cache_misses = int(self.redis_client.client.get("metrics:cache_misses") or 0)
|
||||
cache_hits = int(
|
||||
self.redis_client.client.get("metrics:cache_hits") or 0
|
||||
)
|
||||
cache_misses = int(
|
||||
self.redis_client.client.get("metrics:cache_misses") or 0
|
||||
)
|
||||
total_requests = cache_hits + cache_misses
|
||||
bandwidth_saved = int(self.redis_client.client.get("metrics:bandwidth_saved") or 0)
|
||||
bandwidth_saved = int(
|
||||
self.redis_client.client.get("metrics:bandwidth_saved") or 0
|
||||
)
|
||||
|
||||
metrics["requests"]["cache_hits"] = cache_hits
|
||||
metrics["requests"]["cache_misses"] = cache_misses
|
||||
metrics["requests"]["total_requests"] = total_requests
|
||||
metrics["requests"]["cache_hit_ratio"] = cache_hits / total_requests if total_requests > 0 else 0.0
|
||||
metrics["requests"]["cache_hit_ratio"] = (
|
||||
cache_hits / total_requests if total_requests > 0 else 0.0
|
||||
)
|
||||
metrics["bandwidth"]["saved_bytes"] = bandwidth_saved
|
||||
|
||||
# Get per-remote metrics
|
||||
for remote in config_manager.config.get("remotes", {}).keys():
|
||||
remote_cache_hits = int(self.redis_client.client.get(f"metrics:cache_hits:{remote}") or 0)
|
||||
remote_cache_misses = int(self.redis_client.client.get(f"metrics:cache_misses:{remote}") or 0)
|
||||
remote_cache_hits = int(
|
||||
self.redis_client.client.get(f"metrics:cache_hits:{remote}")
|
||||
or 0
|
||||
)
|
||||
remote_cache_misses = int(
|
||||
self.redis_client.client.get(f"metrics:cache_misses:{remote}")
|
||||
or 0
|
||||
)
|
||||
remote_total = remote_cache_hits + remote_cache_misses
|
||||
remote_bandwidth_saved = int(self.redis_client.client.get(f"metrics:bandwidth_saved:{remote}") or 0)
|
||||
remote_bandwidth_saved = int(
|
||||
self.redis_client.client.get(
|
||||
f"metrics:bandwidth_saved:{remote}"
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
metrics["per_remote"][remote] = {
|
||||
"cache_hits": remote_cache_hits,
|
||||
"cache_misses": remote_cache_misses,
|
||||
"total_requests": remote_total,
|
||||
"cache_hit_ratio": remote_cache_hits / remote_total if remote_total > 0 else 0.0,
|
||||
"cache_hit_ratio": remote_cache_hits / remote_total
|
||||
if remote_total > 0
|
||||
else 0.0,
|
||||
"bandwidth_saved_bytes": remote_bandwidth_saved,
|
||||
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(remote, 0),
|
||||
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(
|
||||
remote, 0
|
||||
),
|
||||
}
|
||||
|
||||
except Exception:
|
||||
|
||||
+19
-28
@@ -1,6 +1,6 @@
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import hashlib
|
||||
from urllib.parse import urlparse
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
@@ -22,22 +22,23 @@ class S3Storage:
|
||||
self.bucket = bucket
|
||||
self.secure = secure
|
||||
|
||||
ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get("SSL_CERT_FILE")
|
||||
config_kwargs = {"request_checksum_calculation": "when_required", "response_checksum_validation": "when_required"}
|
||||
ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('SSL_CERT_FILE')
|
||||
config_kwargs = {
|
||||
"request_checksum_calculation": "when_required",
|
||||
"response_checksum_validation": "when_required"
|
||||
}
|
||||
client_kwargs = {
|
||||
"endpoint_url": f"http{'s' if self.secure else ''}://{self.endpoint}",
|
||||
"aws_access_key_id": self.access_key,
|
||||
"aws_secret_access_key": self.secret_key,
|
||||
"config": Config(**config_kwargs),
|
||||
"config": Config(**config_kwargs)
|
||||
}
|
||||
|
||||
if ca_bundle and os.path.exists(ca_bundle):
|
||||
client_kwargs["verify"] = ca_bundle
|
||||
print(f"Debug: Using CA bundle: {ca_bundle}")
|
||||
else:
|
||||
print(
|
||||
f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}"
|
||||
)
|
||||
print(f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}")
|
||||
|
||||
self.client = boto3.client("s3", **client_kwargs)
|
||||
|
||||
@@ -54,27 +55,17 @@ class S3Storage:
|
||||
except ClientError:
|
||||
self.client.create_bucket(Bucket=self.bucket)
|
||||
|
||||
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||
# Extract directory path and filename
|
||||
clean_path = path.lstrip("/")
|
||||
filename = os.path.basename(clean_path)
|
||||
directory_path = os.path.dirname(clean_path)
|
||||
def get_object_key(self, url: str) -> str:
|
||||
url_hash = hashlib.sha256(url.encode()).hexdigest()[:16]
|
||||
parsed = urlparse(url)
|
||||
filename = os.path.basename(parsed.path)
|
||||
return f"{parsed.netloc}/{url_hash}/{filename}"
|
||||
|
||||
# Special handling for Docker registry blobs (use digest as key for deduplication)
|
||||
if "/blobs/sha256:" in clean_path:
|
||||
# Extract the SHA256 digest for Docker blobs
|
||||
parts = clean_path.split("/blobs/sha256:")
|
||||
if len(parts) == 2:
|
||||
digest = parts[1]
|
||||
return f"{remote_name}/blobs/sha256/{digest}"
|
||||
|
||||
# Hash the directory path to keep keys manageable while preserving remote structure
|
||||
if directory_path:
|
||||
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
||||
return f"{remote_name}/{path_hash}/{filename}"
|
||||
else:
|
||||
# If no directory, just use remote and filename
|
||||
return f"{remote_name}/{filename}"
|
||||
def get_object_key_from_path(self, remote_name: str, path: str) -> str:
|
||||
# Create a key based on the API path for direct access
|
||||
path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||
filename = os.path.basename(path)
|
||||
return f"{remote_name}/{path_hash}/{filename}"
|
||||
|
||||
def exists(self, key: str) -> bool:
|
||||
try:
|
||||
|
||||
@@ -1,175 +0,0 @@
|
||||
"""
|
||||
Pytest configuration and shared fixtures.
|
||||
|
||||
Module-level setup (env vars + connection patches) runs before any test
|
||||
module is imported, so the FastAPI app initialises against mocks rather
|
||||
than real S3 / Redis / PostgreSQL services.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import yaml
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test remote configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TEST_REMOTES = {
|
||||
"remotes": {
|
||||
"alpine-test": {
|
||||
"base_url": "https://dl-cdn.alpinelinux.org",
|
||||
"type": "remote",
|
||||
"package": "alpine",
|
||||
"immutable_patterns": [".*/x86_64/.*\\.apk$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||
},
|
||||
"rpm-test": {
|
||||
"base_url": "https://example.com/rpm",
|
||||
"type": "remote",
|
||||
"package": "rpm",
|
||||
"immutable_patterns": [".*/x86_64/.*\\.rpm$", ".*/repodata/.*$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||
},
|
||||
"docker-test": {
|
||||
"base_url": "https://registry.example.com",
|
||||
"type": "remote",
|
||||
"package": "docker",
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 300},
|
||||
},
|
||||
"docker-restricted": {
|
||||
"base_url": "https://registry.example.com",
|
||||
"type": "remote",
|
||||
"package": "docker",
|
||||
"immutable_patterns": ["^library/nginx"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 300},
|
||||
},
|
||||
"generic-test": {
|
||||
"base_url": "https://releases.example.com",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"immutable_patterns": [".*\\.tar\\.gz$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
"custom-index-test": {
|
||||
"base_url": "https://example.com",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"mutable_patterns": ["metadata\\.json$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"check-mutable-test": {
|
||||
"base_url": "https://example.com",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"mutable_patterns": ["metadata\\.json$"],
|
||||
"check_mutable_updates": True,
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"local-test": {
|
||||
"type": "local",
|
||||
"package": "generic",
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
"pypi-test": {
|
||||
"base_url": "https://pypi.org",
|
||||
"type": "remote",
|
||||
"package": "pypi",
|
||||
"pypi_files_url": "https://files.pythonhosted.org",
|
||||
"pypi_files_remote": "pypi-files-test",
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"pypi-files-test": {
|
||||
"base_url": "https://files.pythonhosted.org",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"immutable_patterns": [
|
||||
"packages/.*\\.whl$",
|
||||
"packages/.*\\.whl\\.metadata$",
|
||||
"packages/.*\\.tar\\.gz$",
|
||||
],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
"npm-test": {
|
||||
"base_url": "https://registry.npmjs.org",
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"npm_files_url": "https://registry.npmjs.org",
|
||||
"npm_files_remote": "npm-test",
|
||||
"immutable_patterns": [r"\.tgz$"],
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"helm-test": {
|
||||
"base_url": "https://helm.releases.hashicorp.com",
|
||||
"type": "remote",
|
||||
"package": "helm",
|
||||
"immutable_patterns": [r"\.tgz$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Write temp config and set env vars BEFORE importing the package
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_tmpdir = tempfile.mkdtemp()
|
||||
_config_path = os.path.join(_tmpdir, "remotes.yaml")
|
||||
with open(_config_path, "w") as _f:
|
||||
yaml.dump(TEST_REMOTES, _f)
|
||||
|
||||
os.environ.update(
|
||||
{
|
||||
"CONFIG_PATH": _config_path,
|
||||
"MINIO_ENDPOINT": "localhost:9000",
|
||||
"MINIO_ACCESS_KEY": "testkey",
|
||||
"MINIO_SECRET_KEY": "testsecret",
|
||||
"MINIO_BUCKET": "testbucket",
|
||||
"REDIS_URL": "redis://localhost:6379/0",
|
||||
"DBHOST": "localhost",
|
||||
"DBPORT": "5432",
|
||||
"DBUSER": "test",
|
||||
"DBPASS": "test",
|
||||
"DBNAME": "test",
|
||||
}
|
||||
)
|
||||
|
||||
# Patch external service connections before the package is imported.
|
||||
# These stay active for the whole session (process exits after tests finish).
|
||||
_boto3_patch = patch("boto3.client", return_value=MagicMock())
|
||||
_redis_patch = patch("redis.from_url", return_value=MagicMock())
|
||||
_psycopg2_patch = patch("psycopg2.connect", return_value=MagicMock())
|
||||
_boto3_patch.start()
|
||||
_redis_patch.start()
|
||||
_psycopg2_patch.start()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import pytest # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def app():
|
||||
from artifactapi.main import app as fastapi_app
|
||||
|
||||
return fastapi_app
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def client(app):
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config_path():
|
||||
return _config_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_remotes():
|
||||
return TEST_REMOTES
|
||||
@@ -1,285 +0,0 @@
|
||||
"""Tests for RedisCache, focusing on is_mutable_file with configurable patterns."""
|
||||
|
||||
import hashlib
|
||||
from unittest.mock import ANY, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from artifactapi.cache import RedisCache
|
||||
from artifactapi.config import _PACKAGE_MUTABLE_PATTERNS
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bare_cache():
|
||||
"""RedisCache instance bypassing __init__ (no Redis needed for pure-logic tests)."""
|
||||
return RedisCache.__new__(RedisCache)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unavailable_cache():
|
||||
"""RedisCache where Redis is not reachable."""
|
||||
with patch("redis.from_url", side_effect=Exception("connection refused")):
|
||||
return RedisCache("redis://localhost:6379/0")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_redis_client():
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cache_with_redis(mock_redis_client):
|
||||
"""RedisCache backed by a MagicMock Redis client."""
|
||||
with patch("redis.from_url", return_value=mock_redis_client):
|
||||
c = RedisCache("redis://localhost:6379/0")
|
||||
c.client = mock_redis_client
|
||||
c.available = True
|
||||
return c
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_mutable_file — alpine patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsMutableFileAlpine:
|
||||
def test_apkindex_tarball_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns)
|
||||
|
||||
def test_nested_apkindex_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert bare_cache.is_mutable_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns)
|
||||
|
||||
def test_apk_package_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns)
|
||||
|
||||
def test_random_tarball_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert not bare_cache.is_mutable_file("some/path/archive.tar.gz", patterns)
|
||||
|
||||
def test_apkindex_signature_file_is_not_index(self, bare_cache):
|
||||
# Signature file adjacent to the index should not be treated as an index
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns)
|
||||
|
||||
def test_apkindex_tmp_file_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_mutable_file — rpm patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsMutableFileRpm:
|
||||
def test_repomd_xml_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("almalinux/9/x86_64/repomd.xml", patterns)
|
||||
|
||||
def test_repodata_primary_xml_gz_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("repo/repodata/primary.xml.gz", patterns)
|
||||
|
||||
def test_repodata_sqlite_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("repo/repodata/primary.sqlite", patterns)
|
||||
|
||||
def test_repodata_sqlite_bz2_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("repo/repodata/other.sqlite.bz2", patterns)
|
||||
|
||||
def test_repodata_yaml_xz_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("repo/repodata/comps.yaml.xz", patterns)
|
||||
|
||||
def test_packages_gz_pattern_matches_any_path(self, bare_cache):
|
||||
# The Packages.gz$ regex is a carryover from the original hardcoded logic and
|
||||
# deliberately matches any path ending in Packages.gz — including Debian-style paths.
|
||||
# This test documents that intentional behaviour.
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert bare_cache.is_mutable_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns)
|
||||
|
||||
def test_rpm_package_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert not bare_cache.is_mutable_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns)
|
||||
|
||||
def test_arbitrary_xml_outside_repodata_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||
assert not bare_cache.is_mutable_file("some/path/config.xml", patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_mutable_file — docker patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsMutableFileDocker:
|
||||
def test_tag_manifest_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert bare_cache.is_mutable_file("library/nginx/manifests/latest", patterns)
|
||||
|
||||
def test_version_tag_manifest_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert bare_cache.is_mutable_file("library/nginx/manifests/1.25.3", patterns)
|
||||
|
||||
def test_hyphenated_tag_manifest_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert bare_cache.is_mutable_file("library/nginx/manifests/latest-rc", patterns)
|
||||
|
||||
def test_numeric_date_tag_manifest_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert bare_cache.is_mutable_file("library/nginx/manifests/20240101", patterns)
|
||||
|
||||
def test_digest_manifest_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
digest = "sha256:" + "a" * 64
|
||||
assert not bare_cache.is_mutable_file(f"library/nginx/manifests/{digest}", patterns)
|
||||
|
||||
def test_tags_list_is_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert bare_cache.is_mutable_file("library/nginx/tags/list", patterns)
|
||||
|
||||
def test_blob_is_not_index(self, bare_cache):
|
||||
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||
assert not bare_cache.is_mutable_file("library/nginx/blobs/sha256:abc123", patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_mutable_file — edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsMutableFileEdgeCases:
|
||||
def test_empty_patterns_nothing_is_index(self, bare_cache):
|
||||
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", [])
|
||||
assert not bare_cache.is_mutable_file("repomd.xml", [])
|
||||
assert not bare_cache.is_mutable_file("library/nginx/manifests/latest", [])
|
||||
|
||||
def test_none_patterns_nothing_is_index(self, bare_cache):
|
||||
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", None)
|
||||
assert not bare_cache.is_mutable_file("repomd.xml", None)
|
||||
|
||||
def test_custom_patterns_match(self, bare_cache):
|
||||
patterns = [r"metadata\.json$", r"index\.yaml$"]
|
||||
assert bare_cache.is_mutable_file("repo/metadata.json", patterns)
|
||||
assert bare_cache.is_mutable_file("repo/subdir/index.yaml", patterns)
|
||||
assert not bare_cache.is_mutable_file("repo/data.tar.gz", patterns)
|
||||
|
||||
def test_custom_pattern_does_not_match_standard_index(self, bare_cache):
|
||||
patterns = [r"metadata\.json$"]
|
||||
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_index_cache_key
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetIndexCacheKey:
|
||||
def test_key_format_is_deterministic(self, bare_cache):
|
||||
# Assert against a pre-computed value to pin the hash algorithm,
|
||||
# truncation length, and format string in one assertion.
|
||||
path = "alpine/v3.18/x86_64/APKINDEX.tar.gz"
|
||||
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||
key = bare_cache.get_index_cache_key("alpine-test", path)
|
||||
assert key == f"index:alpine-test:{expected_hash}"
|
||||
|
||||
def test_different_paths_produce_different_keys(self, bare_cache):
|
||||
k1 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||
k2 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.19/x86_64/APKINDEX.tar.gz")
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_remotes_produce_different_keys(self, bare_cache):
|
||||
k1 = bare_cache.get_index_cache_key("remote-a", "path/to/APKINDEX.tar.gz")
|
||||
k2 = bare_cache.get_index_cache_key("remote-b", "path/to/APKINDEX.tar.gz")
|
||||
assert k1 != k2
|
||||
|
||||
def test_key_starts_with_index_prefix_and_remote(self, bare_cache):
|
||||
key = bare_cache.get_index_cache_key("myremote", "some/path")
|
||||
assert key.startswith("index:myremote:")
|
||||
|
||||
def test_key_hash_segment_is_16_chars(self, bare_cache):
|
||||
key = bare_cache.get_index_cache_key("myremote", "some/path/file.xml")
|
||||
# Format: index:<remote>:<16-char hash> — the fixed length matters for key-space hygiene
|
||||
parts = key.split(":")
|
||||
assert len(parts) == 3
|
||||
assert len(parts[2]) == 16
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# mark_index_cached / is_index_valid
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIndexValidity:
|
||||
def test_mark_index_cached_calls_setex_with_correct_ttl(self, cache_with_redis, mock_redis_client):
|
||||
cache_with_redis.mark_index_cached("remote", "path/APKINDEX.tar.gz", 300)
|
||||
expected_key = cache_with_redis.get_index_cache_key("remote", "path/APKINDEX.tar.gz")
|
||||
mock_redis_client.setex.assert_called_once_with(expected_key, 300, ANY)
|
||||
|
||||
def test_present_key_is_valid(self, cache_with_redis, mock_redis_client):
|
||||
mock_redis_client.exists.return_value = 1
|
||||
assert cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
||||
|
||||
def test_missing_key_is_not_valid(self, cache_with_redis, mock_redis_client):
|
||||
mock_redis_client.exists.return_value = 0
|
||||
assert not cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
||||
|
||||
def test_unavailable_redis_is_not_valid(self, unavailable_cache):
|
||||
assert not unavailable_cache.is_index_valid("remote", "some/path")
|
||||
|
||||
def test_mark_cached_no_op_when_unavailable(self, unavailable_cache):
|
||||
# client is None when Redis is unavailable — setex cannot be called
|
||||
assert unavailable_cache.client is None
|
||||
unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# mutable meta (ETag / Last-Modified storage)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMutableMeta:
|
||||
def test_meta_key_format(self, bare_cache):
|
||||
path = "repo/metadata.json"
|
||||
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||
assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}"
|
||||
|
||||
def test_meta_key_hash_is_16_chars(self, bare_cache):
|
||||
key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json")
|
||||
assert len(key.split(":")[-1]) == 16
|
||||
|
||||
def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client):
|
||||
mock_redis_client.hgetall.return_value = {"etag": '"abc123"'}
|
||||
cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None)
|
||||
mock_redis_client.hset.assert_called_once()
|
||||
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
||||
assert meta["etag"] == '"abc123"'
|
||||
|
||||
def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client):
|
||||
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
||||
mock_redis_client.hgetall.return_value = {"last_modified": lm}
|
||||
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm)
|
||||
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
||||
assert meta["last_modified"] == lm
|
||||
|
||||
def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client):
|
||||
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None)
|
||||
mock_redis_client.hset.assert_not_called()
|
||||
|
||||
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
||||
unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise
|
||||
|
||||
def test_get_returns_empty_when_unavailable(self, unavailable_cache):
|
||||
assert unavailable_cache.get_mutable_meta("remote", "path") == {}
|
||||
|
||||
def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client):
|
||||
expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json")
|
||||
cache_with_redis.delete_mutable_meta("remote", "path/meta.json")
|
||||
mock_redis_client.delete.assert_called_once_with(expected_key)
|
||||
|
||||
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
||||
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
||||
@@ -1,353 +0,0 @@
|
||||
"""Tests for ConfigManager, focusing on get_mutable_patterns and get_immutable_patterns."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from artifactapi.config import ConfigManager
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_config(tmp_path):
|
||||
"""Factory: write a remotes dict to a temp YAML and return a ConfigManager."""
|
||||
|
||||
def _make(remotes_dict):
|
||||
cfg_file = tmp_path / "remotes.yaml"
|
||||
cfg_file.write_text(yaml.dump({"remotes": remotes_dict}))
|
||||
return ConfigManager(str(cfg_file))
|
||||
|
||||
return _make
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_mutable_patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetMutablePatterns:
|
||||
def test_alpine_returns_package_defaults(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"APKINDEX\.tar\.gz$" in patterns
|
||||
|
||||
def test_rpm_returns_package_defaults(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "rpm", "base_url": "https://x.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"repomd\.xml$" in patterns
|
||||
assert any("repodata" in p for p in patterns)
|
||||
|
||||
def test_docker_returns_package_defaults(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "docker", "base_url": "https://x.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert any("manifests" in p for p in patterns)
|
||||
assert any("tags/list" in p for p in patterns)
|
||||
|
||||
def test_generic_returns_empty_list(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||
assert cfg.get_mutable_patterns("r") == []
|
||||
|
||||
def test_unknown_remote_returns_empty_list(self, make_config):
|
||||
cfg = make_config({})
|
||||
assert cfg.get_mutable_patterns("nonexistent") == []
|
||||
|
||||
def test_missing_package_field_defaults_to_generic(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "base_url": "https://x.com"}})
|
||||
assert cfg.get_mutable_patterns("r") == []
|
||||
|
||||
def test_unknown_package_type_returns_empty_list(self, make_config):
|
||||
# A mis-spelled package type silently returns [] — this is a known footgun
|
||||
cfg = make_config({"r": {"type": "remote", "package": "deb", "base_url": "https://x.com"}})
|
||||
assert cfg.get_mutable_patterns("r") == []
|
||||
|
||||
def test_extra_patterns_appended_after_defaults(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "alpine",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"custom\.json$"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"APKINDEX\.tar\.gz$" in patterns
|
||||
assert r"custom\.json$" in patterns
|
||||
# Defaults come first
|
||||
assert patterns.index(r"APKINDEX\.tar\.gz$") < patterns.index(r"custom\.json$")
|
||||
|
||||
def test_explicit_empty_extra_patterns_returns_defaults(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "alpine",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [],
|
||||
}
|
||||
}
|
||||
)
|
||||
assert r"APKINDEX\.tar\.gz$" in cfg.get_mutable_patterns("r")
|
||||
|
||||
def test_duplicate_extra_pattern_not_added_twice(self, make_config):
|
||||
existing = r"APKINDEX\.tar\.gz$"
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "alpine",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [existing],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert patterns.count(existing) == 1
|
||||
|
||||
def test_generic_with_only_extra_patterns(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"meta\.json$", r"index\.yaml$"],
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_mutable_patterns("r") == [r"meta\.json$", r"index\.yaml$"]
|
||||
|
||||
def test_rpm_extra_patterns_merged(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "rpm",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"custom-meta\.xml$"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"repomd\.xml$" in patterns
|
||||
assert r"custom-meta\.xml$" in patterns
|
||||
|
||||
def test_npm_has_no_package_defaults(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "npm", "base_url": "https://x.com"}})
|
||||
assert cfg.get_mutable_patterns("r") == []
|
||||
|
||||
def test_npm_explicit_mutable_pattern_matches_metadata(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert any(re.search(p, "express") for p in patterns)
|
||||
assert any(re.search(p, "@babel/core") for p in patterns)
|
||||
|
||||
def test_helm_returns_index_yaml_as_mutable(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"index\.yaml$" in patterns
|
||||
|
||||
def test_helm_chart_tarballs_not_mutable_by_default(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
# Only index.yaml is mutable; .tgz chart tarballs are not
|
||||
assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns)
|
||||
assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns)
|
||||
|
||||
def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert not any(re.search(p, "express-4.18.2.tgz") for p in patterns)
|
||||
assert not any(re.search(p, "express/-/express-4.18.2.tgz") for p in patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_immutable_patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetImmutablePatterns:
|
||||
def test_returns_immutable_patterns(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_immutable_patterns("r") == [r".*\.tar\.gz$"]
|
||||
|
||||
def test_returns_empty_for_missing_remote(self, make_config):
|
||||
cfg = make_config({})
|
||||
assert cfg.get_immutable_patterns("nonexistent") == []
|
||||
|
||||
def test_returns_empty_when_no_patterns_configured(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||
assert cfg.get_immutable_patterns("r") == []
|
||||
|
||||
def test_multiple_patterns_returned(self, make_config):
|
||||
patterns = [r".*\.rpm$", r".*/repodata/.*$"]
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "rpm",
|
||||
"base_url": "https://x.com",
|
||||
"immutable_patterns": patterns,
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_immutable_patterns("r") == patterns
|
||||
|
||||
def test_dict_keyed_repositories_returns_per_repo_patterns(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||
"repositories": {
|
||||
"/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]},
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_immutable_patterns("r", "/path/to/repo") == [r".*\.rpm$"]
|
||||
|
||||
def test_dict_keyed_repositories_falls_back_to_remote_patterns(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||
"repositories": {
|
||||
"/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]},
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_immutable_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_user_mutable_patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetUserMutablePatterns:
|
||||
def test_returns_only_user_patterns(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "alpine",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"custom\.json$"],
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_user_mutable_patterns("r") == [r"custom\.json$"]
|
||||
|
||||
def test_excludes_package_defaults(self, make_config):
|
||||
# Package defaults (APKINDEX etc.) must NOT appear here
|
||||
cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}})
|
||||
assert cfg.get_user_mutable_patterns("r") == []
|
||||
|
||||
def test_returns_empty_for_missing_remote(self, make_config):
|
||||
cfg = make_config({})
|
||||
assert cfg.get_user_mutable_patterns("nonexistent") == []
|
||||
|
||||
def test_returns_empty_when_key_absent(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||
assert cfg.get_user_mutable_patterns("r") == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_cache_config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetCacheConfig:
|
||||
def test_returns_cache_section(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 7200},
|
||||
}
|
||||
}
|
||||
)
|
||||
assert cfg.get_cache_config("r") == {"immutable_ttl": 0, "mutable_ttl": 7200}
|
||||
|
||||
def test_returns_empty_dict_for_missing_remote(self, make_config):
|
||||
cfg = make_config({})
|
||||
assert cfg.get_cache_config("nonexistent") == {}
|
||||
|
||||
def test_returns_empty_dict_when_no_cache_key(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||
assert cfg.get_cache_config("r") == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config file reload
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfigReload:
|
||||
def test_reloads_when_file_mtime_advances(self, tmp_path):
|
||||
cfg_file = tmp_path / "remotes.yaml"
|
||||
cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}}))
|
||||
cfg = ConfigManager(str(cfg_file))
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
|
||||
cfg_file.write_text(yaml.dump({"remotes": {"repo-b": {"type": "remote", "package": "generic", "base_url": "https://y.com"}}}))
|
||||
future_mtime = cfg._last_modified + 1
|
||||
os.utime(str(cfg_file), (future_mtime, future_mtime))
|
||||
|
||||
cfg._check_reload()
|
||||
|
||||
assert "repo-b" in cfg.config["remotes"]
|
||||
assert "repo-a" not in cfg.config["remotes"]
|
||||
|
||||
def test_no_reload_when_file_unchanged(self, tmp_path):
|
||||
cfg_file = tmp_path / "remotes.yaml"
|
||||
cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}}))
|
||||
cfg = ConfigManager(str(cfg_file))
|
||||
|
||||
# Call check_reload without touching the file — should not reload
|
||||
cfg._check_reload()
|
||||
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
@@ -1,273 +0,0 @@
|
||||
"""Tests for docker_auth: WWW-Authenticate parsing and token caching."""
|
||||
|
||||
import time
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from artifactapi import docker_auth
|
||||
from artifactapi.docker_auth import (
|
||||
_cache_key,
|
||||
_get_cached_token,
|
||||
_store_token,
|
||||
fetch_token,
|
||||
get_docker_token_for_response,
|
||||
parse_www_authenticate,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_token_cache():
|
||||
"""Isolate tests: wipe the module-level token cache before and after each test."""
|
||||
docker_auth._token_cache.clear()
|
||||
yield
|
||||
docker_auth._token_cache.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_www_authenticate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParseWwwAuthenticate:
|
||||
def test_full_bearer_header(self):
|
||||
header = 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/nginx:pull"'
|
||||
result = parse_www_authenticate(header)
|
||||
assert result is not None
|
||||
realm, service, scope = result
|
||||
assert realm == "https://auth.docker.io/token"
|
||||
assert service == "registry.docker.io"
|
||||
assert scope == "repository:library/nginx:pull"
|
||||
|
||||
def test_realm_only(self):
|
||||
header = 'Bearer realm="https://auth.example.com/token"'
|
||||
result = parse_www_authenticate(header)
|
||||
assert result is not None
|
||||
realm, service, scope = result
|
||||
assert realm == "https://auth.example.com/token"
|
||||
assert service == ""
|
||||
assert scope == ""
|
||||
|
||||
def test_realm_and_service_only(self):
|
||||
header = 'Bearer realm="https://auth.example.com",service="registry.example.com"'
|
||||
result = parse_www_authenticate(header)
|
||||
assert result is not None
|
||||
_, service, scope = result
|
||||
assert service == "registry.example.com"
|
||||
assert scope == ""
|
||||
|
||||
def test_invalid_scheme_returns_none(self):
|
||||
assert parse_www_authenticate('Basic realm="example"') is None
|
||||
|
||||
def test_empty_header_returns_none(self):
|
||||
assert parse_www_authenticate("") is None
|
||||
|
||||
def test_case_insensitive_bearer_parses_realm(self):
|
||||
header = 'bearer realm="https://auth.example.com/token"'
|
||||
result = parse_www_authenticate(header)
|
||||
assert result is not None
|
||||
realm, _, _ = result
|
||||
assert realm == "https://auth.example.com/token"
|
||||
|
||||
def test_field_order_scope_before_service_drops_service(self):
|
||||
# The regex requires realm,service,scope order; scope before service
|
||||
# results in service being silently dropped. This test documents the known limitation.
|
||||
header = 'Bearer realm="https://auth.example.com",scope="repo:pull",service="svc"'
|
||||
result = parse_www_authenticate(header)
|
||||
assert result is not None
|
||||
realm, service, scope = result
|
||||
assert realm == "https://auth.example.com"
|
||||
assert scope == "repo:pull"
|
||||
assert service == "" # silently dropped when out of order
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _cache_key
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCacheKey:
|
||||
def test_key_contains_all_components(self):
|
||||
key = _cache_key("https://realm.com", "svc", "scope", "user")
|
||||
assert "https://realm.com" in key
|
||||
assert "svc" in key
|
||||
assert "scope" in key
|
||||
assert "user" in key
|
||||
|
||||
def test_none_username_uses_empty_string(self):
|
||||
key = _cache_key("https://realm.com", "svc", "scope", None)
|
||||
assert key.endswith("|")
|
||||
|
||||
def test_different_services_give_different_keys(self):
|
||||
k1 = _cache_key("realm", "svc1", "scope", None)
|
||||
k2 = _cache_key("realm", "svc2", "scope", None)
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_scopes_give_different_keys(self):
|
||||
k1 = _cache_key("realm", "svc", "scope:read", None)
|
||||
k2 = _cache_key("realm", "svc", "scope:write", None)
|
||||
assert k1 != k2
|
||||
|
||||
def test_pipe_in_field_value_can_collide_with_adjacent_fields(self):
|
||||
# The "|" separator is not escaped, so a pipe embedded in one field
|
||||
# produces the same key as the same pipe appearing as a separator boundary.
|
||||
# This is a known limitation: _cache_key("a|b","c","d",None) ==
|
||||
# _cache_key("a","b|c","d",None). Documents the behaviour, not a claim it's correct.
|
||||
k1 = _cache_key("a|b", "c", "d", None)
|
||||
k2 = _cache_key("a", "b|c", "d", None)
|
||||
assert k1 == k2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _get_cached_token / _store_token
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTokenCaching:
|
||||
def test_get_returns_none_when_not_cached(self):
|
||||
assert _get_cached_token("no-such-key") is None
|
||||
|
||||
def test_get_returns_token_when_valid(self):
|
||||
_store_token("mykey", "tok-abc", 300)
|
||||
assert _get_cached_token("mykey") == "tok-abc"
|
||||
|
||||
def test_get_returns_none_when_expired(self):
|
||||
docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1)
|
||||
assert _get_cached_token("mykey") is None
|
||||
|
||||
def test_expired_entry_is_removed_from_cache(self):
|
||||
docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1)
|
||||
_get_cached_token("mykey")
|
||||
assert "mykey" not in docker_auth._token_cache
|
||||
|
||||
def test_store_expires_30s_before_stated_time(self):
|
||||
before = time.time()
|
||||
_store_token("mykey", "tok", 100)
|
||||
_, expires_at = docker_auth._token_cache["mykey"]
|
||||
# expires_in - 30 = 70; allow ±2 s clock wiggle
|
||||
assert before + 68 <= expires_at <= before + 72
|
||||
|
||||
def test_store_enforces_minimum_10s_expiry(self):
|
||||
before = time.time()
|
||||
_store_token("mykey", "tok", 5) # expires_in - 30 would be negative
|
||||
_, expires_at = docker_auth._token_cache["mykey"]
|
||||
assert expires_at >= before + 10
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fetch_token (async, mocks httpx)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_mock_http_client(token_payload: dict):
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.json.return_value = token_payload
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||
return ctx, mock_client
|
||||
|
||||
|
||||
class TestFetchToken:
|
||||
async def test_returns_token_field(self):
|
||||
ctx, _ = _make_mock_http_client({"token": "bearer-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token == "bearer-tok"
|
||||
|
||||
async def test_falls_back_to_access_token_field(self):
|
||||
ctx, _ = _make_mock_http_client({"access_token": "access-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token == "access-tok"
|
||||
|
||||
async def test_returns_none_when_response_missing_token_field(self):
|
||||
ctx, _ = _make_mock_http_client({"not_token": "value", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token is None
|
||||
|
||||
async def test_defaults_expires_in_to_300_when_missing(self):
|
||||
ctx, _ = _make_mock_http_client({"token": "tok"}) # no expires_in key
|
||||
before = time.time()
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token == "tok"
|
||||
key = _cache_key("https://auth.example.com", "svc", "scope", None)
|
||||
_, expires_at = docker_auth._token_cache[key]
|
||||
# Default expires_in=300, stored as time.time() + max(300-30, 10) = 270
|
||||
assert before + 268 <= expires_at <= before + 272
|
||||
|
||||
async def test_uses_cache_on_second_call_without_http(self):
|
||||
ctx, mock_client = _make_mock_http_client({"token": "cached-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
mock_client.get.reset_mock()
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
mock_client.get.assert_not_called()
|
||||
assert token == "cached-tok"
|
||||
|
||||
async def test_returns_none_on_network_error(self):
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(side_effect=Exception("connection refused"))
|
||||
ctx = MagicMock()
|
||||
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token is None
|
||||
|
||||
async def test_returns_none_on_http_status_error(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError("401 Unauthorized", request=MagicMock(), response=MagicMock())
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
ctx = MagicMock()
|
||||
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
assert token is None
|
||||
|
||||
async def test_passes_credentials_as_auth_tuple(self):
|
||||
ctx, mock_client = _make_mock_http_client({"token": "authed-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
await fetch_token("https://auth.example.com", "svc", "scope", "user", "pass")
|
||||
call_kwargs = mock_client.get.call_args.kwargs
|
||||
assert call_kwargs.get("auth") == ("user", "pass")
|
||||
|
||||
async def test_no_auth_when_no_credentials(self):
|
||||
ctx, mock_client = _make_mock_http_client({"token": "anon-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
await fetch_token("https://auth.example.com", "svc", "scope")
|
||||
call_kwargs = mock_client.get.call_args.kwargs
|
||||
assert call_kwargs.get("auth") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_docker_token_for_response
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetDockerTokenForResponse:
|
||||
async def test_returns_none_for_non_bearer_header(self):
|
||||
token = await get_docker_token_for_response('Basic realm="example"')
|
||||
assert token is None
|
||||
|
||||
async def test_end_to_end_parse_and_fetch(self):
|
||||
"""parse_www_authenticate → fetch_token wired together end-to-end."""
|
||||
header = 'Bearer realm="https://auth.example.com",service="svc",scope="repo:pull"'
|
||||
ctx, mock_client = _make_mock_http_client({"token": "e2e-tok", "expires_in": 300})
|
||||
with patch("httpx.AsyncClient", return_value=ctx):
|
||||
token = await get_docker_token_for_response(header, "user", "pass")
|
||||
assert token == "e2e-tok"
|
||||
call_kwargs = mock_client.get.call_args.kwargs
|
||||
assert call_kwargs["params"]["service"] == "svc"
|
||||
assert call_kwargs["params"]["scope"] == "repo:pull"
|
||||
assert call_kwargs["auth"] == ("user", "pass")
|
||||
@@ -1,926 +0,0 @@
|
||||
"""FastAPI route tests using TestClient with mocked service dependencies."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-test service mocks (replace module-level globals in main.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_storage():
|
||||
m = MagicMock()
|
||||
m.get_object_key.return_value = "test-remote/abc123/file.ext"
|
||||
m.exists.return_value = False
|
||||
m.download_object.return_value = b"fake content"
|
||||
m.bucket = "testbucket"
|
||||
m.client = MagicMock()
|
||||
return m
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_cache():
|
||||
m = MagicMock()
|
||||
m.is_mutable_file.return_value = False
|
||||
m.is_index_valid.return_value = True
|
||||
m.available = False
|
||||
m.client = None
|
||||
return m
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_database():
|
||||
m = MagicMock()
|
||||
m.available = False
|
||||
return m
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_metrics():
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_deps(mock_storage, mock_cache, mock_database, mock_metrics):
|
||||
"""Swap the module-level service instances in main.py for the duration of a test."""
|
||||
import artifactapi.main as main_mod
|
||||
|
||||
with (
|
||||
patch.object(main_mod, "storage", mock_storage),
|
||||
patch.object(main_mod, "cache", mock_cache),
|
||||
patch.object(main_mod, "database", mock_database),
|
||||
patch.object(main_mod, "metrics", mock_metrics),
|
||||
):
|
||||
yield {
|
||||
"storage": mock_storage,
|
||||
"cache": mock_cache,
|
||||
"database": mock_database,
|
||||
"metrics": mock_metrics,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Basic / health endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBasicEndpoints:
|
||||
def test_root_returns_remote_list(self, client):
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "remotes" in data
|
||||
assert isinstance(data["remotes"], list)
|
||||
assert len(data["remotes"]) > 0
|
||||
|
||||
def test_root_contains_version(self, client):
|
||||
response = client.get("/")
|
||||
assert "version" in response.json()
|
||||
|
||||
def test_health_check(self, client):
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "healthy"
|
||||
|
||||
def test_docker_v2_ping(self, client):
|
||||
response = client.get("/v2/")
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("Docker-Distribution-Api-Version") == "registry/2.0"
|
||||
assert response.json() == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Docker proxy /v2/{remote}/{path}
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDockerProxy:
|
||||
def test_unknown_remote_returns_404(self, client, patched_deps):
|
||||
response = client.get("/v2/no-such-remote/library/nginx/manifests/latest")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_non_docker_package_returns_400(self, client, patched_deps):
|
||||
# alpine-test is package: alpine, not docker
|
||||
response = client.get("/v2/alpine-test/library/nginx/manifests/latest")
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_pattern_blocked_returns_403(self, client, patched_deps):
|
||||
# docker-restricted allows only "library/nginx"
|
||||
response = client.get("/v2/docker-restricted/library/ubuntu/manifests/latest")
|
||||
assert response.status_code == 403
|
||||
|
||||
def test_allowed_pattern_proceeds_to_cache(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/v2/docker-restricted/library/nginx/manifests/latest")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_cache_hit_manifest_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.docker.distribution.manifest.v2+json",
|
||||
"schemaVersion": 2,
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
assert response.status_code == 200
|
||||
ct = response.headers["content-type"]
|
||||
assert ct.startswith("application/vnd.docker.distribution.manifest.v2+json")
|
||||
|
||||
def test_cache_hit_sets_docker_content_digest_header(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
expected = f"sha256:{hashlib.sha256(manifest).hexdigest()}"
|
||||
assert response.headers["Docker-Content-Digest"] == expected
|
||||
|
||||
def test_cache_hit_records_metrics(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode()
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
deps["metrics"].record_cache_hit.assert_called_once_with("docker-test", ANY)
|
||||
|
||||
def test_head_request_returns_no_body(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.head("/v2/docker-test/library/nginx/manifests/latest")
|
||||
assert response.status_code == 200
|
||||
assert response.content == b""
|
||||
|
||||
def test_cache_miss_calls_upstream_fetch(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_cache_miss_on_index_marks_index_cached(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_called_once()
|
||||
|
||||
def test_index_expired_triggers_refetch(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
manifest = json.dumps(
|
||||
{
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"layers": [],
|
||||
}
|
||||
).encode()
|
||||
deps["storage"].exists.return_value = True # cached in S3
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
|
||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/v2/docker-test/library/nginx/manifests/latest")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generic artifact route /api/v1/remote/{remote}/{path}
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGenericArtifactRoute:
|
||||
def test_unknown_remote_returns_404(self, client, patched_deps):
|
||||
response = client.get("/api/v1/remote/nonexistent/path/to/file.tar.gz")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_pattern_blocked_returns_403(self, client, patched_deps):
|
||||
# generic-test only allows .tar.gz
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/file.rpm")
|
||||
assert response.status_code == 403
|
||||
|
||||
def test_cache_hit_returns_200_with_source_header(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"tar content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
assert response.status_code == 200
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
assert response.content == b"tar content"
|
||||
|
||||
def test_cache_hit_sets_content_disposition(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
disposition = response.headers["content-disposition"]
|
||||
assert "attachment" in disposition
|
||||
assert "archive.tar.gz" in disposition
|
||||
|
||||
def test_cache_hit_sets_artifact_size_header(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
content = b"some artifact content bytes"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = content
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
assert response.headers["X-Artifact-Size"] == str(len(content))
|
||||
|
||||
def test_cache_hit_records_metrics(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
deps["metrics"].record_cache_hit.assert_called_once_with("generic-test", ANY)
|
||||
|
||||
def test_cache_hit_records_artifact_mapping(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
deps["database"].record_artifact_mapping.assert_called_once()
|
||||
|
||||
def test_cache_hit_rpm_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"rpm bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/rpm-test/almalinux/9/x86_64/bash-5.1.8.x86_64.rpm")
|
||||
assert response.status_code == 200
|
||||
assert "application/x-rpm" in response.headers["content-type"]
|
||||
|
||||
def test_cache_hit_xml_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"<?xml version='1.0'?>"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/rpm-test/repo/repodata/primary.xml")
|
||||
assert response.status_code == 200
|
||||
assert "application/xml" in response.headers["content-type"]
|
||||
|
||||
def test_cache_miss_fetches_upstream_and_returns_200(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"fresh content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert response.headers["X-Artifact-Source"] == "remote"
|
||||
|
||||
def test_cache_miss_records_metrics(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"fresh content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
|
||||
deps["metrics"].record_cache_miss.assert_called_once_with("generic-test", ANY)
|
||||
|
||||
def test_cache_miss_on_index_marks_index_cached(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"APKINDEX content"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_called_once()
|
||||
|
||||
def test_upstream_error_returns_502(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "error", "error": "upstream unreachable"},
|
||||
):
|
||||
response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz")
|
||||
|
||||
assert response.status_code == 502
|
||||
|
||||
def test_mutable_file_bypasses_immutable_patterns(self, client, patched_deps):
|
||||
"""Mutable files must be served even when they don't match immutable_patterns."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"APKINDEX content"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
# APKINDEX.tar.gz does not match alpine-test's immutable_patterns (.*.apk$),
|
||||
# but since is_mutable_file returns True it must be allowed through.
|
||||
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_mutable_unchanged_refreshes_ttl_without_redownload(self, client, patched_deps):
|
||||
"""When check_mutable_updates=True and upstream says 304, TTL is refreshed in place."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"metadata content"
|
||||
# File is mutable and its TTL has expired
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=False):
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_called()
|
||||
# S3 object must NOT have been deleted (no re-download)
|
||||
deps["storage"].client.delete_object.assert_not_called()
|
||||
|
||||
def test_mutable_changed_triggers_redownload(self, client, patched_deps):
|
||||
"""When check_mutable_updates=True and upstream says 200, cache is invalidated."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 502
|
||||
|
||||
def test_mutable_changed_redownloads_successfully(self, client, patched_deps):
|
||||
"""When check_mutable_updates=True and upstream says 200, fresh copy is fetched and served."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"fresh metadata"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None}
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 200
|
||||
mock_cache.assert_called_once()
|
||||
|
||||
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
||||
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||
from artifactapi.main import UpstreamUnreachable
|
||||
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"stale metadata"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_called()
|
||||
deps["storage"].client.delete_object.assert_not_called()
|
||||
|
||||
def test_mutable_backend_unreachable_on_expiry_keeps_stale(self, client, patched_deps):
|
||||
"""When a regular mutable file expires and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"stale APKINDEX"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
|
||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_called()
|
||||
deps["storage"].client.delete_object.assert_not_called()
|
||||
|
||||
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
|
||||
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock) as mock_check:
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||
client.get("/api/v1/remote/custom-index-test/metadata.json")
|
||||
|
||||
mock_check.assert_not_called()
|
||||
|
||||
def test_local_repo_file_not_found_returns_404(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["database"].get_local_file_metadata.return_value = None
|
||||
deps["database"].available = True
|
||||
|
||||
response = client.get("/api/v1/remote/local-test/path/to/nonexistent.bin")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Upload route PUT /api/v1/remote/{remote}/{path}
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestUploadRoute:
|
||||
def test_unknown_remote_returns_404(self, client, patched_deps):
|
||||
response = client.put(
|
||||
"/api/v1/remote/nonexistent/path/to/file.tar.gz",
|
||||
files={"file": ("file.tar.gz", b"content", "application/octet-stream")},
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_non_local_remote_returns_400(self, client, patched_deps):
|
||||
response = client.put(
|
||||
"/api/v1/remote/generic-test/path/to/file.tar.gz",
|
||||
files={"file": ("file.tar.gz", b"content", "application/octet-stream")},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HEAD route HEAD /api/v1/remote/{remote}/{path}
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHeadRoute:
|
||||
def test_non_local_remote_returns_405(self, client, patched_deps):
|
||||
response = client.head("/api/v1/remote/generic-test/path/to/file.tar.gz")
|
||||
assert response.status_code == 405
|
||||
|
||||
def test_local_repo_file_not_found_returns_404(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["database"].get_local_file_metadata.return_value = None
|
||||
deps["database"].available = True
|
||||
|
||||
response = client.head("/api/v1/remote/local-test/path/to/nonexistent.bin")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_unknown_remote_returns_404(self, client, patched_deps):
|
||||
response = client.head("/api/v1/remote/nonexistent/path/to/file.bin")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DELETE route DELETE /api/v1/remote/{remote}/{path}
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDeleteRoute:
|
||||
def test_unknown_remote_returns_404(self, client, patched_deps):
|
||||
response = client.delete("/api/v1/remote/nonexistent/path/to/file.tar.gz")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_non_local_remote_returns_400(self, client, patched_deps):
|
||||
response = client.delete("/api/v1/remote/generic-test/path/to/file.tar.gz")
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache flush PUT /cache/flush
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCacheFlushEndpoint:
|
||||
def test_flush_all_returns_flushed_structure(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["cache"].available = False
|
||||
deps["storage"].client.list_objects_v2.return_value = {}
|
||||
|
||||
response = client.put("/cache/flush")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "flushed" in data
|
||||
assert "redis_keys" in data["flushed"]
|
||||
assert "s3_objects" in data["flushed"]
|
||||
|
||||
def test_flush_specific_remote_echoes_remote(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["cache"].available = False
|
||||
deps["storage"].client.list_objects_v2.return_value = {}
|
||||
|
||||
response = client.put("/cache/flush?remote=alpine-test")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["remote"] == "alpine-test"
|
||||
|
||||
def test_flush_all_deletes_redis_keys_when_cache_available(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["cache"].available = True
|
||||
redis_mock = MagicMock()
|
||||
deps["cache"].client = redis_mock
|
||||
# index:* returns keys; mutable:meta:* and metrics:* return nothing
|
||||
redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], [], []]
|
||||
deps["storage"].client.list_objects_v2.return_value = {}
|
||||
|
||||
response = client.put("/cache/flush")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["flushed"]["redis_keys"] == 2
|
||||
redis_mock.delete.assert_called_once_with("index:test:abc", "index:test:def")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Metrics endpoint GET /metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMetricsEndpoint:
|
||||
def test_returns_prometheus_text_by_default(self, client, patched_deps):
|
||||
response = client.get("/metrics")
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-type"].startswith("text/plain")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config endpoint GET /config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfigEndpoint:
|
||||
def test_returns_config_with_remotes(self, client):
|
||||
response = client.get("/config")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "remotes" in data
|
||||
assert "alpine-test" in data["remotes"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PyPI remote /api/v1/remote/pypi-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPyPIRemote:
|
||||
def test_simple_index_is_mutable(self, client, patched_deps):
|
||||
"""simple/ paths are detected as mutable (package-type default)."""
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/requests-2.31.0.tar.gz'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_simple_index_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""files.pythonhosted.org URLs in a cached simple index are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/requests-2.31.0.tar.gz'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
assert b"files.pythonhosted.org" not in response.content
|
||||
assert b"/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz" in response.content
|
||||
|
||||
def test_simple_index_content_type_is_html(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"<html></html>"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
assert "text/html" in response.headers["content-type"]
|
||||
|
||||
def test_simple_index_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/p-1.0.whl'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"files.pythonhosted.org" not in response.content
|
||||
|
||||
def test_wheel_file_immutable_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"PK wheel bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0-py3-none-any.whl")
|
||||
assert response.status_code == 200
|
||||
assert "application/zip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_sdist_immutable_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"tar bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
|
||||
def test_blocked_path_on_files_remote_returns_403(self, client, patched_deps):
|
||||
"""Paths that don't match immutable_patterns on pypi-files-test are blocked."""
|
||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests.unknown")
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# npm remote /api/v1/remote/npm-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNpmRemote:
|
||||
def test_package_metadata_is_mutable(self, client, patched_deps):
|
||||
"""Top-level package metadata paths are detected as mutable."""
|
||||
deps = patched_deps
|
||||
meta = b'{"name":"express","versions":{}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_metadata_tarball_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""registry.npmjs.org tarball URLs in metadata JSON are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/express/-/express-4.18.2.tgz"}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
assert b"/api/v1/remote/npm-test/express/-/express-4.18.2.tgz" in response.content
|
||||
|
||||
def test_metadata_content_type_is_json(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b'{"name":"express"}'
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
assert "application/json" in response.headers["content-type"]
|
||||
|
||||
def test_scoped_package_metadata_rewritten(self, client, patched_deps):
|
||||
"""@scope/package metadata URLs are also rewritten back to the same npm-test remote."""
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/@babel/core/-/core-7.21.0.tgz"}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/@babel/core")
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
assert b"/api/v1/remote/npm-test/@babel/core/-/core-7.21.0.tgz" in response.content
|
||||
|
||||
def test_tarball_not_rewritten(self, client, patched_deps):
|
||||
"""Tarball requests (.tgz) bypass URL rewriting and return binary."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"\x1f\x8b tgz bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_metadata_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"}}'
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/npm-test/lodash")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
|
||||
def test_tarball_immutable_allowed_on_npm_remote(self, client, patched_deps):
|
||||
"""Tarballs (.tgz) match immutable_patterns and are served without rewriting."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"tgz bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helm remote /api/v1/remote/helm-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHelmRemote:
|
||||
def test_index_yaml_is_mutable(self, client, patched_deps):
|
||||
"""index.yaml is detected as mutable (package-type default)."""
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_index_yaml_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""base_url chart URLs in a cached index.yaml are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
assert b"helm.releases.hashicorp.com" not in response.content
|
||||
assert b"/api/v1/remote/helm-test/vault-0.29.1.tgz" in response.content
|
||||
|
||||
def test_index_yaml_content_type_is_yaml(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
assert "text/yaml" in response.headers["content-type"]
|
||||
|
||||
def test_chart_tarball_immutable_returns_gzip_content_type(self, client, patched_deps):
|
||||
"""Versioned chart tarballs match immutable_patterns and are served as binary."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"\x1f\x8b chart bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/vault-0.29.1.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_index_yaml_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"helm.releases.hashicorp.com" not in response.content
|
||||
|
||||
def test_non_tgz_non_yaml_path_blocked_by_pattern(self, client, patched_deps):
|
||||
"""Paths that don't match immutable_patterns and aren't mutable are blocked."""
|
||||
deps = patched_deps
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/vault.zip")
|
||||
assert response.status_code == 403
|
||||
@@ -1,132 +0,0 @@
|
||||
"""Tests for S3Storage: get_object_key (pure logic) and I/O methods."""
|
||||
|
||||
import hashlib
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from fastapi import HTTPException
|
||||
|
||||
from artifactapi.storage import S3Storage
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def storage():
|
||||
"""S3Storage with a mocked boto3 client."""
|
||||
with patch("boto3.client", return_value=MagicMock()):
|
||||
s = S3Storage(
|
||||
endpoint="localhost:9000",
|
||||
access_key="testkey",
|
||||
secret_key="testsecret",
|
||||
bucket="testbucket",
|
||||
secure=False,
|
||||
)
|
||||
s.client = MagicMock()
|
||||
return s
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_object_key
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetObjectKey:
|
||||
def test_key_has_three_part_structure(self, storage):
|
||||
# remote / hash-segment / filename
|
||||
key = storage.get_object_key("myremote", "some/path/to/file.rpm")
|
||||
parts = key.split("/")
|
||||
assert len(parts) == 3
|
||||
assert parts[0] == "myremote"
|
||||
assert parts[2] == "file.rpm"
|
||||
assert len(parts[1]) == 16 # SHA-256 hex truncated to 16 chars
|
||||
|
||||
def test_key_uses_sha256_of_directory_path(self, storage):
|
||||
# Pin the hash algorithm, truncation length, and format in one assertion
|
||||
key = storage.get_object_key("myremote", "some/path/to/file.rpm")
|
||||
expected_hash = hashlib.sha256(b"some/path/to").hexdigest()[:16]
|
||||
assert key == f"myremote/{expected_hash}/file.rpm"
|
||||
|
||||
def test_different_remotes_give_different_keys(self, storage):
|
||||
k1 = storage.get_object_key("remote-a", "path/to/file.rpm")
|
||||
k2 = storage.get_object_key("remote-b", "path/to/file.rpm")
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_directories_give_different_keys(self, storage):
|
||||
k1 = storage.get_object_key("myremote", "path/version-1/file.rpm")
|
||||
k2 = storage.get_object_key("myremote", "path/version-2/file.rpm")
|
||||
assert k1 != k2
|
||||
assert k1.split("/")[-1] == k2.split("/")[-1] == "file.rpm"
|
||||
|
||||
def test_leading_slash_stripped(self, storage):
|
||||
k1 = storage.get_object_key("myremote", "/path/to/file.rpm")
|
||||
k2 = storage.get_object_key("myremote", "path/to/file.rpm")
|
||||
assert k1 == k2
|
||||
|
||||
def test_file_with_no_directory(self, storage):
|
||||
key = storage.get_object_key("myremote", "file.rpm")
|
||||
assert key == "myremote/file.rpm"
|
||||
|
||||
def test_docker_blob_uses_digest_path(self, storage):
|
||||
digest = "a" * 64 # realistic 64-char SHA-256 hex string
|
||||
path = f"library/nginx/blobs/sha256:{digest}"
|
||||
key = storage.get_object_key("dockerhub", path)
|
||||
assert key == f"dockerhub/blobs/sha256/{digest}"
|
||||
|
||||
def test_docker_blob_deduplication_across_images(self, storage):
|
||||
"""Same blob digest pulled from different images maps to the same S3 key."""
|
||||
digest = "deadbeef" * 8 # 64-char hex
|
||||
k1 = storage.get_object_key("dockerhub", f"library/nginx/blobs/sha256:{digest}")
|
||||
k2 = storage.get_object_key("dockerhub", f"library/ubuntu/blobs/sha256:{digest}")
|
||||
assert k1 == k2
|
||||
|
||||
def test_docker_blob_different_digests_different_keys(self, storage):
|
||||
k1 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "a" * 64)
|
||||
k2 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "b" * 64)
|
||||
assert k1 != k2
|
||||
|
||||
def test_docker_blob_different_remotes_different_keys(self, storage):
|
||||
digest = "abc" * 21 + "d" # 64-char hex
|
||||
k1 = storage.get_object_key("remote-a", f"library/nginx/blobs/sha256:{digest}")
|
||||
k2 = storage.get_object_key("remote-b", f"library/nginx/blobs/sha256:{digest}")
|
||||
assert k1 != k2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetUrl:
|
||||
def test_returns_http_url_for_insecure_endpoint(self, storage):
|
||||
url = storage.get_url("myremote/abc123/file.rpm")
|
||||
assert url == "http://localhost:9000/testbucket/myremote/abc123/file.rpm"
|
||||
|
||||
def test_returns_http_url_for_secure_storage(self):
|
||||
with patch("boto3.client", return_value=MagicMock()):
|
||||
s = S3Storage(endpoint="s3.example.com", access_key="k", secret_key="s", bucket="b", secure=True)
|
||||
s.client = MagicMock()
|
||||
# get_url uses http:// always (direct internal access address, not the S3 protocol)
|
||||
assert s.get_url("path/to/file.rpm") == "http://s3.example.com/b/path/to/file.rpm"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# upload / download_object
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestUpload:
|
||||
def test_upload_returns_s3_uri(self, storage):
|
||||
storage.client.put_object.return_value = {}
|
||||
result = storage.upload("myremote/abc123/file.rpm", b"content")
|
||||
assert result == "s3://testbucket/myremote/abc123/file.rpm"
|
||||
|
||||
|
||||
class TestDownloadObject:
|
||||
def test_download_object_raises_404_on_client_error(self, storage):
|
||||
storage.client.get_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey", "Message": "The specified key does not exist"}},
|
||||
"GetObject",
|
||||
)
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
storage.download_object("nonexistent/key")
|
||||
assert exc_info.value.status_code == 404
|
||||
Reference in New Issue
Block a user