Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4789635e87 | |||
| ba52fedd27 | |||
| 76633403b2 | |||
| cae3503ac4 | |||
| 3f098df428 | |||
| 64266f40e9 | |||
| be25fc19f7 | |||
| 3bd3ca8b74 | |||
| 373366e695 | |||
| e6d9b175ce | |||
| 0daca40156 | |||
| 0df726467a | |||
| b8bc7f8714 | |||
| 0c780c1bd1 | |||
| 173b5d8b10 | |||
| 3352a3e886 | |||
| 8adcbac405 | |||
| 4ca89b9159 | |||
| 25b85ddc92 | |||
| d585ab425c | |||
| 6b1a6c9eb4 | |||
| 5de912db75 | |||
| 8e9d313892 | |||
| 70cd439961 | |||
| fe837dabf7 | |||
| 78296dae8f | |||
| 8fe4bac2b9 | |||
| 8bc9285117 | |||
| ce01a94141 | |||
| 4619ae18d8 | |||
| ac51d3a51d | |||
| 2887ce4476 | |||
| 9e52929d73 | |||
| 788d469063 | |||
| 1cbe836f1b | |||
| f3394b9ca6 | |||
| 8da43e610e | |||
| 3a13d76f7e | |||
| 2d0e2c64e6 | |||
| 2414ddfdd3 |
@@ -0,0 +1,15 @@
|
|||||||
|
.git/
|
||||||
|
.venv/
|
||||||
|
dist/
|
||||||
|
tests/
|
||||||
|
remotes.yaml
|
||||||
|
ca-bundle.pem
|
||||||
|
.env
|
||||||
|
*.log
|
||||||
|
docker-compose.yml
|
||||||
|
.woodpecker/
|
||||||
|
.tox/
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
.pre-commit-cache/
|
||||||
|
minio_data/
|
||||||
+12
-2
@@ -35,7 +35,6 @@ env/
|
|||||||
|
|
||||||
# Environment variables
|
# Environment variables
|
||||||
.env
|
.env
|
||||||
remotes.yaml
|
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
@@ -43,9 +42,20 @@ remotes.yaml
|
|||||||
# uv
|
# uv
|
||||||
uv.lock
|
uv.lock
|
||||||
|
|
||||||
|
# tox
|
||||||
|
.tox/
|
||||||
|
|
||||||
|
# pytest
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# pre-commit
|
||||||
|
.pre-commit-cache/
|
||||||
|
|
||||||
|
# ruff
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
# Docker volumes
|
# Docker volumes
|
||||||
minio_data/
|
minio_data/
|
||||||
|
|
||||||
# Local configuration overrides
|
# Local configuration overrides
|
||||||
docker-compose.yml
|
|
||||||
ca-bundle.pem
|
ca-bundle.pem
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.15.12
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: [--fix, --exit-non-zero-on-fix]
|
||||||
|
- id: ruff-format
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
when:
|
||||||
|
- event: pull_request
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: docker-build
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
repo: git.unkin.net/unkin/artifactapi
|
||||||
|
dry_run: true
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
when:
|
||||||
|
- event: tag
|
||||||
|
ref: refs/tags/v*
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: docker
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: git.unkin.net
|
||||||
|
repo: git.unkin.net/unkin/artifactapi
|
||||||
|
username: droneci
|
||||||
|
password:
|
||||||
|
from_secret: DRONECI_PASSWORD
|
||||||
|
tags:
|
||||||
|
- ${CI_COMMIT_TAG}
|
||||||
|
- latest
|
||||||
|
build_args:
|
||||||
|
- VERSION=${CI_COMMIT_TAG##v}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
when:
|
||||||
|
- event: pull_request
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: pre-commit
|
||||||
|
image: git.unkin.net/unkin/almalinux9-base:20260308
|
||||||
|
commands:
|
||||||
|
- uvx pre-commit run --all-files
|
||||||
|
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
when:
|
||||||
|
- event: pull_request
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: test
|
||||||
|
image: git.unkin.net/unkin/almalinux9-base:20260308
|
||||||
|
commands:
|
||||||
|
- uvx --python 3.11 --with tox-uv tox
|
||||||
+15
-45
@@ -1,53 +1,23 @@
|
|||||||
# Use Alpine Linux as base image
|
FROM git.unkin.net/unkin/almalinux9-base:latest
|
||||||
FROM python:3.11-alpine
|
|
||||||
|
|
||||||
# Set working directory
|
ARG VERSION=0.0.0.dev0
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install system dependencies
|
COPY . /build
|
||||||
RUN apk add --no-cache \
|
|
||||||
gcc \
|
|
||||||
musl-dev \
|
|
||||||
libffi-dev \
|
|
||||||
postgresql-dev \
|
|
||||||
curl \
|
|
||||||
wget \
|
|
||||||
tar
|
|
||||||
|
|
||||||
# Install uv
|
RUN HATCH_VCS_PRETEND_VERSION=${VERSION} \
|
||||||
ARG PACKAGE_VERSION=0.9.21
|
SETUPTOOLS_SCM_PRETEND_VERSION=${VERSION} \
|
||||||
RUN wget -O /app/uv-x86_64-unknown-linux-musl.tar.gz https://github.com/astral-sh/uv/releases/download/${PACKAGE_VERSION}/uv-x86_64-unknown-linux-musl.tar.gz && \
|
uv build --wheel --directory /build && \
|
||||||
tar xf /app/uv-x86_64-unknown-linux-musl.tar.gz -C /app && \
|
useradd -m -r -s /bin/sh appuser
|
||||||
mv /app/uv-x86_64-unknown-linux-musl/uv /usr/local/bin/uv && \
|
|
||||||
rm -rf /app/uv-x86_64-unknown-linux-musl* && \
|
|
||||||
chmod +x /usr/local/bin/uv && \
|
|
||||||
uv --version
|
|
||||||
|
|
||||||
# Create non-root user first
|
|
||||||
RUN adduser -D -s /bin/sh appuser && \
|
|
||||||
chown -R appuser:appuser /app
|
|
||||||
|
|
||||||
# Copy dependency files and change ownership
|
|
||||||
COPY --chown=appuser:appuser pyproject.toml uv.lock README.md ./
|
|
||||||
|
|
||||||
# Switch to appuser and install Python dependencies
|
|
||||||
USER appuser
|
USER appuser
|
||||||
ARG VERSION=dev
|
RUN uv tool install --from /build/dist/*.whl artifactapi
|
||||||
ENV HATCH_VCS_PRETEND_VERSION=${VERSION} \
|
|
||||||
SETUPTOOLS_SCM_PRETEND_VERSION=${VERSION}
|
|
||||||
RUN uv sync --frozen
|
|
||||||
|
|
||||||
# Copy application source
|
USER root
|
||||||
COPY --chown=appuser:appuser src/ ./src/
|
RUN rm -rf /build
|
||||||
COPY --chown=appuser:appuser remotes.yaml ./
|
|
||||||
COPY --chown=appuser:appuser ca-bundle.pem ./
|
|
||||||
|
|
||||||
# Expose port
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD curl -f http://localhost:8000/health || exit 1
|
||||||
# Health check
|
USER appuser
|
||||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
ENV PATH="/home/appuser/.local/bin:$PATH"
|
||||||
CMD curl -f http://localhost:8000/health || exit 1
|
WORKDIR /app
|
||||||
|
CMD ["artifactapi"]
|
||||||
# Run the application
|
|
||||||
CMD ["uv", "run", "python", "-m", "src.artifactapi.main"]
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
.PHONY: build install dev clean test lint format docker-build docker-up docker-down docker-logs docker-rebuild docker-clean docker-restart
|
.PHONY: build install dev clean test lint format pre-commit tox docker-build docker-up docker-down docker-logs docker-rebuild docker-clean docker-restart
|
||||||
|
|
||||||
build:
|
build:
|
||||||
docker build --no-cache -t artifactapi:latest .
|
docker build -t artifactapi:dev .
|
||||||
|
|
||||||
install: build
|
install: build
|
||||||
|
|
||||||
@@ -17,7 +17,13 @@ clean:
|
|||||||
rm -rf *.egg-info/
|
rm -rf *.egg-info/
|
||||||
|
|
||||||
test:
|
test:
|
||||||
uv run pytest
|
uvx --python 3.11 --with tox-uv tox
|
||||||
|
|
||||||
|
tox:
|
||||||
|
uvx --python 3.11 --with tox-uv tox
|
||||||
|
|
||||||
|
pre-commit:
|
||||||
|
uvx --python 3.11 pre-commit run --all-files
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
uv run ruff check --fix .
|
uv run ruff check --fix .
|
||||||
@@ -68,4 +74,3 @@ major:
|
|||||||
|
|
||||||
_tag:
|
_tag:
|
||||||
git push origin $(TAG)
|
git push origin $(TAG)
|
||||||
docker-compose build --no-cache --build-arg VERSION=$(TAG:v%=%)
|
|
||||||
|
|||||||
@@ -1,91 +1,146 @@
|
|||||||
# Artifact Storage System
|
# Artifact Storage System
|
||||||
|
|
||||||
A generic FastAPI-based artifact caching system that downloads and stores files from remote sources (GitHub, Gitea, HashiCorp, etc.) in S3-compatible storage with configuration-based access control.
|
FastAPI caching proxy that downloads and stores files from remote sources in S3-compatible storage.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
- Remote definitions via `remotes.yaml` — generic HTTP, Alpine APK, RPM, Docker, PyPI, npm, Helm
|
||||||
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
- Immutable/mutable caching model with per-remote TTLs
|
||||||
- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz`
|
- Conditional revalidation (`If-None-Match` / `If-Modified-Since`) on TTL expiry
|
||||||
- **Pattern Filtering**: Regex-based inclusion patterns for security and organization
|
- Stale-on-upstream-error: refreshes TTL when backend is unreachable rather than evicting
|
||||||
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
- URL rewriting for PyPI simple index, npm metadata, and Helm `index.yaml`
|
||||||
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
- Access control via regex patterns — unmatched paths return 403
|
||||||
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
The system acts as a caching proxy that:
|
```
|
||||||
1. Receives requests via the `/api/{remote}/{path}` endpoint
|
client → /api/v1/remote/{remote}/{path}
|
||||||
2. Checks if the file is already cached
|
↓
|
||||||
3. If not cached, downloads from the configured remote and caches it
|
Redis: mutable TTL check
|
||||||
4. Serves the file with appropriate headers and content types
|
↓ miss / expired
|
||||||
5. Enforces access control via configurable regex patterns
|
S3: object exists?
|
||||||
|
↓ no
|
||||||
## Quick Start
|
upstream remote → S3 + PostgreSQL metadata
|
||||||
|
↓
|
||||||
1. Start MinIO container:
|
response (X-Artifact-Source: cache|remote)
|
||||||
```bash
|
|
||||||
docker-compose up -d
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Create virtual environment and install dependencies:
|
Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2.
|
||||||
```bash
|
|
||||||
uv venv
|
### Code layout
|
||||||
source .venv/bin/activate
|
|
||||||
uv pip install -r requirements.txt
|
|
||||||
```
|
```
|
||||||
|
src/artifactapi/
|
||||||
3. Start the API:
|
├── main.py — FastAPI app + thin route declarations only
|
||||||
```bash
|
├── config.py — ConfigManager (loads remotes.yaml)
|
||||||
python main.py
|
├── metrics.py — Prometheus + Redis metrics
|
||||||
```
|
├── docker_auth.py — backwards-compat shim → auth/docker.py
|
||||||
|
├── artifact/ — route handler implementations
|
||||||
4. Access artifacts directly via URL:
|
│ ├── proxy.py — GET /api/v1/remote (remote proxy, cache, revalidation)
|
||||||
```bash
|
│ ├── local.py — PUT/HEAD/DELETE /api/v1/remote (local repos)
|
||||||
# This will download and cache the file on first access
|
│ ├── docker.py — /v2/ Docker Registry v2 proxy
|
||||||
xh GET localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
│ ├── discovery.py — /api/v1/artifacts discovery + bulk cache
|
||||||
|
│ └── flush.py — PUT /cache/flush
|
||||||
# Subsequent requests serve from cache (see X-Artifact-Source: cache header)
|
├── auth/
|
||||||
curl -I localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
│ ├── __init__.py — re-exports Docker auth helpers
|
||||||
|
│ └── docker.py — Bearer token fetching + in-memory cache
|
||||||
|
├── cache/
|
||||||
|
│ ├── __init__.py — re-exports RedisCache
|
||||||
|
│ └── redis.py — RedisCache (TTL keys, ETag metadata)
|
||||||
|
├── database/
|
||||||
|
│ ├── __init__.py — re-exports DatabaseManager
|
||||||
|
│ └── postgres.py — DatabaseManager (artifact + local-file tables)
|
||||||
|
├── storage/
|
||||||
|
│ ├── __init__.py — re-exports S3Storage
|
||||||
|
│ └── s3.py — S3Storage (MinIO/S3 abstraction)
|
||||||
|
└── remote/
|
||||||
|
├── __init__.py
|
||||||
|
├── base.py — content-type detection
|
||||||
|
├── generic.py — generic HTTP remotes
|
||||||
|
├── helm.py — Helm index.yaml URL rewriting
|
||||||
|
├── npm.py — npm metadata URL rewriting
|
||||||
|
├── python.py — PyPI URL construction + HTML rewriting
|
||||||
|
└── rpm.py — RPM remotes
|
||||||
```
|
```
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
### Direct Access
|
| Method | Path | Description |
|
||||||
- `GET /api/{remote}/{path}` - Direct access to artifacts with auto-caching
|
|---|---|---|
|
||||||
|
| `GET` | `/api/v1/remote/{remote}/{path}` | Fetch artifact (auto-cache on miss) |
|
||||||
### Management
|
| `PUT` | `/api/v1/remote/{remote}/{path}` | Upload to local remote |
|
||||||
- `GET /` - API info and available remotes
|
| `HEAD` | `/api/v1/remote/{remote}/{path}` | Check existence (local remotes) |
|
||||||
- `GET /health` - Health check
|
| `DELETE` | `/api/v1/remote/{remote}/{path}` | Delete from local remote |
|
||||||
- `GET /config` - View current configuration
|
| `GET` | `/v2/{remote}/{path}` | Docker Registry v2 proxy |
|
||||||
- `POST /cache-artifact` - Batch cache artifacts matching pattern
|
| `PUT` | `/cache/flush` | Flush cache entries |
|
||||||
- `GET /artifacts/{remote}` - List cached artifacts
|
| `GET` | `/health` | Health check |
|
||||||
|
| `GET` | `/config` | View loaded configuration |
|
||||||
|
| `GET` | `/` | API info and available remotes |
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
The system uses `remotes.yaml` to define remote repositories and access patterns. All other configuration is provided via environment variables.
|
Runtime settings come from environment variables; remote definitions live in one or more YAML files pointed to by `CONFIG_PATH`.
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description |
|
||||||
|
|---|---|
|
||||||
|
| `CONFIG_PATH` | Path to a config YAML file **or** a directory of YAML files |
|
||||||
|
| `DBHOST`, `DBPORT`, `DBUSER`, `DBPASS`, `DBNAME` | PostgreSQL connection |
|
||||||
|
| `REDIS_URL` | Redis URL (e.g. `redis://localhost:6379`) |
|
||||||
|
| `MINIO_ENDPOINT` | MinIO/S3 endpoint |
|
||||||
|
| `MINIO_ACCESS_KEY` | S3 access key |
|
||||||
|
| `MINIO_SECRET_KEY` | S3 secret key |
|
||||||
|
| `MINIO_BUCKET` | S3 bucket name |
|
||||||
|
| `MINIO_SECURE` | Use HTTPS (`true`/`false`) |
|
||||||
|
|
||||||
|
### Split configuration
|
||||||
|
|
||||||
|
`CONFIG_PATH` accepts three forms:
|
||||||
|
|
||||||
|
**Single file** (original behaviour):
|
||||||
|
```
|
||||||
|
CONFIG_PATH=/etc/artifactapi/remotes.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
**Directory** — all `*.yaml` / `*.yml` files in the directory are loaded and merged alphabetically. `remotes` keys are merged across files; later files win on conflict:
|
||||||
|
```
|
||||||
|
CONFIG_PATH=/etc/artifactapi/conf.d/
|
||||||
|
```
|
||||||
|
|
||||||
|
**Main file + `config_dir`** — the main file holds global settings and a `config_dir` pointer; each file in that directory contributes its own `remotes`. Relative `config_dir` paths are resolved relative to the main file:
|
||||||
|
```yaml
|
||||||
|
# /etc/artifactapi/config.yaml
|
||||||
|
config_dir: conf.d # or an absolute path
|
||||||
|
|
||||||
|
# s3/redis/database settings go here (or in env vars)
|
||||||
|
remotes: {} # optional base remotes
|
||||||
|
```
|
||||||
|
|
||||||
### remotes.yaml Structure
|
### remotes.yaml Structure
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
remote-name:
|
remote-name:
|
||||||
base_url: "https://example.com" # Base URL for the remote
|
base_url: "https://example.com"
|
||||||
type: "remote" # Type: "remote" or "local"
|
type: "remote" # "remote" or "local"
|
||||||
package: "generic" # Package type: "generic", "alpine", "rpm"
|
package: "generic" # generic, alpine, rpm, docker, pypi, npm, helm
|
||||||
description: "Human readable description"
|
description: "..."
|
||||||
include_patterns: # Regex patterns for allowed files
|
immutable_patterns: # regex — cached forever
|
||||||
- "pattern1"
|
- ".*\\.tar\\.gz$"
|
||||||
- "pattern2"
|
mutable_patterns: # regex — expire after mutable_ttl
|
||||||
cache: # Cache configuration (optional)
|
- "index\\.yaml$"
|
||||||
file_ttl: 0 # File cache TTL (0 = indefinite)
|
check_mutable_updates: false # send HEAD (If-None-Match) on TTL expiry
|
||||||
index_ttl: 300 # Index file TTL in seconds
|
cache:
|
||||||
|
immutable_ttl: 0 # 0 = indefinitely
|
||||||
|
mutable_ttl: 3600
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remote Types
|
## Remote Types
|
||||||
|
|
||||||
#### Generic Remotes
|
### generic
|
||||||
For general file hosting (GitHub releases, custom servers):
|
|
||||||
|
Arbitrary HTTP file servers — GitHub releases, HashiCorp, custom servers.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -93,31 +148,28 @@ remotes:
|
|||||||
base_url: "https://github.com"
|
base_url: "https://github.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "GitHub releases and files"
|
immutable_patterns:
|
||||||
include_patterns:
|
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache files indefinitely
|
immutable_ttl: 0
|
||||||
index_ttl: 0 # No index files for generic remotes
|
|
||||||
|
|
||||||
hashicorp-releases:
|
github-archive:
|
||||||
base_url: "https://releases.hashicorp.com"
|
base_url: "https://github.com"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "HashiCorp product releases"
|
immutable_patterns:
|
||||||
include_patterns:
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$" # tag archives never change
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
mutable_patterns:
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
- ".*/archive/refs/heads/main\\.tar\\.gz$" # branch archives can change
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
check_mutable_updates: true
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 86400
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Package Repository Remotes
|
Access: `GET /api/v1/remote/github/owner/repo/releases/download/v1.0/binary.tar.gz`
|
||||||
For Linux package repositories with index files:
|
|
||||||
|
### alpine
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
@@ -125,540 +177,224 @@ remotes:
|
|||||||
base_url: "https://dl-cdn.alpinelinux.org"
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "alpine"
|
package: "alpine"
|
||||||
description: "Alpine Linux APK package repository"
|
immutable_patterns:
|
||||||
include_patterns:
|
- ".*/x86_64/.*\\.apk$"
|
||||||
- ".*/x86_64/.*\\.apk$" # Only x86_64 packages
|
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0 # Cache packages indefinitely
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours
|
mutable_ttl: 7200
|
||||||
|
```
|
||||||
|
|
||||||
|
`APKINDEX.tar.gz` is a built-in mutable pattern — no `mutable_patterns` entry needed.
|
||||||
|
|
||||||
|
### rpm
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
almalinux:
|
almalinux:
|
||||||
base_url: "http://mirror.aarnet.edu.au/pub/almalinux"
|
base_url: "https://mirror.example.com/almalinux"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "rpm"
|
package: "rpm"
|
||||||
description: "AlmaLinux RPM package repository"
|
immutable_patterns:
|
||||||
include_patterns:
|
|
||||||
- ".*/x86_64/.*\\.rpm$"
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
- ".*/noarch/.*\\.rpm$"
|
- ".*/noarch/.*\\.rpm$"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 7200 # Cache metadata files for 2 hours
|
mutable_ttl: 7200
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Local Repositories
|
`repomd.xml` and `repodata/` metadata files are built-in mutable patterns.
|
||||||
For storing custom artifacts:
|
|
||||||
|
### docker
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
dockerhub:
|
||||||
|
base_url: "https://registry-1.docker.io"
|
||||||
|
type: "remote"
|
||||||
|
package: "docker"
|
||||||
|
# username / password optional for public images
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 300
|
||||||
|
|
||||||
|
ghcr:
|
||||||
|
base_url: "https://ghcr.io"
|
||||||
|
type: "remote"
|
||||||
|
package: "docker"
|
||||||
|
username: "your-github-username"
|
||||||
|
password: "ghp_your_pat" # read:packages scope
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 300
|
||||||
|
```
|
||||||
|
|
||||||
|
Tag manifests and `/tags/list` are built-in mutable patterns. Digest-addressed blobs are immutable.
|
||||||
|
|
||||||
|
For RKE2/containerd, configure `/etc/rancher/rke2/registries.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
mirrors:
|
||||||
|
docker.io:
|
||||||
|
endpoint:
|
||||||
|
- "https://artifacts.example.com"
|
||||||
|
rewrite:
|
||||||
|
"^(.*)$": "dockerhub/$1"
|
||||||
|
ghcr.io:
|
||||||
|
endpoint:
|
||||||
|
- "https://artifacts.example.com"
|
||||||
|
rewrite:
|
||||||
|
"^(.*)$": "ghcr/$1"
|
||||||
|
```
|
||||||
|
|
||||||
|
### pypi
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note**: Simple index requests (`/simple/{package}/`) are always fetched from `https://pypi.org`, regardless of `base_url`. This is hardcoded — `base_url` only controls where package files are downloaded from. For self-hosted registries (Gitea, Nexus) where both index and files share the same host, set `base_url` to that host and the override does not apply.
|
||||||
|
|
||||||
|
URLs in simple index HTML are rewritten to route package file downloads back through the same remote.
|
||||||
|
|
||||||
|
Configure uv:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# /etc/uv/uv.toml or ~/.config/uv/uv.toml
|
||||||
|
[[index]]
|
||||||
|
url = "https://artifacts.example.com/api/v1/remote/pypi/simple"
|
||||||
|
default = true
|
||||||
|
```
|
||||||
|
|
||||||
|
### npm
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
npm:
|
||||||
|
base_url: "https://registry.npmjs.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "npm"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "\.tgz$"
|
||||||
|
mutable_patterns:
|
||||||
|
- "^(?!.*\.tgz$).*"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
|
```
|
||||||
|
|
||||||
|
`dist.tarball` URLs in package metadata JSON are rewritten to route tarball downloads back through the same remote.
|
||||||
|
|
||||||
|
Configure npm / yarn / pnpm:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
# .npmrc or ~/.npmrc
|
||||||
|
registry=https://artifacts.example.com/api/v1/remote/npm/
|
||||||
|
```
|
||||||
|
|
||||||
|
### helm
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
hashicorp-helm:
|
||||||
|
base_url: "https://helm.releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "helm"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "\\.tgz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 3600
|
||||||
|
```
|
||||||
|
|
||||||
|
`index.yaml` is a built-in mutable pattern. Chart URLs inside `index.yaml` are rewritten to route tarball downloads back through the same remote.
|
||||||
|
|
||||||
|
Configure Helm:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
helm repo add hashicorp https://artifacts.example.com/api/v1/remote/hashicorp-helm
|
||||||
|
helm repo update
|
||||||
|
```
|
||||||
|
|
||||||
|
### local
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
remotes:
|
remotes:
|
||||||
local-generic:
|
local-generic:
|
||||||
type: "local"
|
type: "local"
|
||||||
package: "generic"
|
package: "generic"
|
||||||
description: "Local generic file repository"
|
description: "Local file repository"
|
||||||
cache:
|
cache:
|
||||||
file_ttl: 0
|
immutable_ttl: 0
|
||||||
index_ttl: 0
|
mutable_ttl: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
### Include Patterns
|
No `base_url`. Files are uploaded via `PUT` and served via `GET`.
|
||||||
|
|
||||||
Include patterns are regular expressions that control which files can be accessed:
|
## Caching Model
|
||||||
|
|
||||||
|
### Immutable patterns
|
||||||
|
|
||||||
|
Files matching `immutable_patterns` are cached for `immutable_ttl` seconds (0 = indefinitely). Use for versioned release artifacts that never change once published.
|
||||||
|
|
||||||
|
**Access control**: only paths matching an immutable or mutable pattern are served; all others return 403. Omitting `immutable_patterns` entirely allows all paths from that remote.
|
||||||
|
|
||||||
|
### Mutable patterns
|
||||||
|
|
||||||
|
Files matching `mutable_patterns` expire after `mutable_ttl` seconds and are re-fetched on the next request. Mutable files are always served regardless of `immutable_patterns`.
|
||||||
|
|
||||||
|
Each package type has built-in defaults that are merged with any user-defined `mutable_patterns`:
|
||||||
|
|
||||||
|
| Package type | Built-in mutable patterns |
|
||||||
|
|---|---|
|
||||||
|
| `alpine` | `APKINDEX\.tar\.gz$` |
|
||||||
|
| `rpm` | `repomd\.xml$`, `repodata/` metadata variants, `Packages\.gz$` |
|
||||||
|
| `docker` | Tag manifests (non-digest refs), `/tags/list` |
|
||||||
|
| `pypi` | `simple/` (per-package and top-level index pages) |
|
||||||
|
| `helm` | `index\.yaml$` |
|
||||||
|
| `npm` | *(none built-in — define via `mutable_patterns`)* |
|
||||||
|
| `generic` | *(none)* |
|
||||||
|
|
||||||
|
### Conditional revalidation
|
||||||
|
|
||||||
|
Set `check_mutable_updates: true` to send `HEAD` with `If-None-Match` / `If-Modified-Since` on TTL expiry. A 304 response refreshes the TTL without re-downloading. Only applies to user-defined `mutable_patterns` — built-in patterns are always re-fetched unconditionally.
|
||||||
|
|
||||||
|
### Stale-on-upstream-error
|
||||||
|
|
||||||
|
When a mutable file expires and the upstream is unreachable (connection refused, DNS failure, timeout), the cached copy is kept and its TTL refreshed. HTTP error responses (4xx, 5xx) are not treated as network failures and proceed with normal expiry.
|
||||||
|
|
||||||
|
### Quarantine (supply-chain protection)
|
||||||
|
|
||||||
|
Set `quarantine_new: true` and `quarantine_days: N` on a remote to block immutable artifacts published within the last N days. Requests return `404` until the quarantine period expires, giving time to detect malicious packages before they are consumed.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
include_patterns:
|
remotes:
|
||||||
# Specific project patterns
|
pypi:
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
# File extension patterns
|
package: "pypi"
|
||||||
- ".*\\.tar\\.gz$"
|
quarantine_new: true
|
||||||
- ".*\\.zip$"
|
quarantine_days: 3 # block packages published in the last 3 days
|
||||||
- ".*\\.rpm$"
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
# Architecture-specific patterns
|
- "packages/.*\\.tar\\.gz$"
|
||||||
- ".*/x86_64/.*"
|
cache:
|
||||||
- ".*/linux-amd64/.*"
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
# Version-specific patterns
|
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Security Note**: Only files matching at least one include pattern are accessible. Files not matching any pattern return HTTP 403.
|
The upstream `Last-Modified` response header is used as the publish date proxy. Artifacts that have no `Last-Modified` header are allowed through (fail-open). Mutable files (index pages, tag manifests) are never quarantined.
|
||||||
|
|
||||||
### Cache Configuration
|
|
||||||
|
|
||||||
Control how long different file types are cached:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
cache:
|
|
||||||
file_ttl: 0 # Regular files (0 = cache indefinitely)
|
|
||||||
index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Index Files**: Repository metadata files that change frequently:
|
|
||||||
- Alpine: `APKINDEX.tar.gz`
|
|
||||||
- RPM: `repomd.xml`, `*-primary.xml.gz`, etc.
|
|
||||||
- These are automatically detected and use `index_ttl`
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
All runtime configuration comes from environment variables:
|
|
||||||
|
|
||||||
**Database Configuration:**
|
|
||||||
- `DBHOST` - PostgreSQL host
|
|
||||||
- `DBPORT` - PostgreSQL port
|
|
||||||
- `DBUSER` - PostgreSQL username
|
|
||||||
- `DBPASS` - PostgreSQL password
|
|
||||||
- `DBNAME` - PostgreSQL database name
|
|
||||||
|
|
||||||
**Redis Configuration:**
|
|
||||||
- `REDIS_URL` - Redis connection URL (e.g., `redis://localhost:6379`)
|
|
||||||
|
|
||||||
**S3/MinIO Configuration:**
|
|
||||||
- `MINIO_ENDPOINT` - MinIO/S3 endpoint
|
|
||||||
- `MINIO_ACCESS_KEY` - S3 access key
|
|
||||||
- `MINIO_SECRET_KEY` - S3 secret key
|
|
||||||
- `MINIO_BUCKET` - S3 bucket name
|
|
||||||
- `MINIO_SECURE` - Use HTTPS (`true`/`false`)
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Direct File Access
|
|
||||||
```bash
|
|
||||||
# Access GitHub releases
|
|
||||||
curl localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
|
||||||
|
|
||||||
# Access HashiCorp releases (when configured)
|
|
||||||
curl localhost:8000/api/hashicorp/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip
|
|
||||||
|
|
||||||
# Access custom remotes
|
|
||||||
curl localhost:8000/api/custom/path/to/file.tar.gz
|
|
||||||
```
|
|
||||||
|
|
||||||
### Response Headers
|
|
||||||
- `X-Artifact-Source: cache|remote` - Indicates if served from cache or freshly downloaded
|
|
||||||
- `Content-Type` - Automatically detected (application/gzip, application/zip, etc.)
|
|
||||||
- `Content-Disposition` - Download filename
|
|
||||||
- `Content-Length` - File size
|
|
||||||
|
|
||||||
### Pattern Enforcement
|
|
||||||
Access is controlled by regex patterns in the configuration. Requests for files not matching any pattern return HTTP 403.
|
|
||||||
|
|
||||||
## Storage Path Format
|
|
||||||
|
|
||||||
Files are stored with keys like:
|
|
||||||
- `{remote_name}/{path_hash}/{filename}` for direct API access
|
|
||||||
- `{hostname}/{url_hash}/{filename}` for legacy batch operations
|
|
||||||
|
|
||||||
Example: `github/a1b2c3d4e5f6g7h8/terragrunt_linux_amd64.tar.gz`
|
|
||||||
|
|
||||||
## Kubernetes Deployment
|
|
||||||
|
|
||||||
Deploy the artifact storage system to Kubernetes using the following manifests:
|
|
||||||
|
|
||||||
### 1. Namespace
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: artifact-storage
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. ConfigMap for remotes.yaml
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: artifactapi-config
|
|
||||||
namespace: artifact-storage
|
|
||||||
data:
|
|
||||||
remotes.yaml: |
|
|
||||||
remotes:
|
|
||||||
github:
|
|
||||||
base_url: "https://github.com"
|
|
||||||
type: "remote"
|
|
||||||
package: "generic"
|
|
||||||
description: "GitHub releases and files"
|
|
||||||
include_patterns:
|
|
||||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
|
||||||
- "lxc/incus/.*\\.tar\\.gz$"
|
|
||||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
|
||||||
cache:
|
|
||||||
file_ttl: 0
|
|
||||||
index_ttl: 0
|
|
||||||
|
|
||||||
hashicorp-releases:
|
|
||||||
base_url: "https://releases.hashicorp.com"
|
|
||||||
type: "remote"
|
|
||||||
package: "generic"
|
|
||||||
description: "HashiCorp product releases"
|
|
||||||
include_patterns:
|
|
||||||
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
|
||||||
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
|
||||||
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
|
||||||
cache:
|
|
||||||
file_ttl: 0
|
|
||||||
index_ttl: 0
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Secret for Environment Variables
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: artifactapi-secret
|
|
||||||
namespace: artifact-storage
|
|
||||||
type: Opaque
|
|
||||||
stringData:
|
|
||||||
DBHOST: "postgres-service"
|
|
||||||
DBPORT: "5432"
|
|
||||||
DBUSER: "artifacts"
|
|
||||||
DBPASS: "artifacts123"
|
|
||||||
DBNAME: "artifacts"
|
|
||||||
REDIS_URL: "redis://redis-service:6379"
|
|
||||||
MINIO_ENDPOINT: "minio-service:9000"
|
|
||||||
MINIO_ACCESS_KEY: "minioadmin"
|
|
||||||
MINIO_SECRET_KEY: "minioadmin"
|
|
||||||
MINIO_BUCKET: "artifacts"
|
|
||||||
MINIO_SECURE: "false"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. PostgreSQL Deployment
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: postgres
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: postgres
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: postgres
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: postgres
|
|
||||||
image: postgres:15-alpine
|
|
||||||
env:
|
|
||||||
- name: POSTGRES_DB
|
|
||||||
value: artifacts
|
|
||||||
- name: POSTGRES_USER
|
|
||||||
value: artifacts
|
|
||||||
- name: POSTGRES_PASSWORD
|
|
||||||
value: artifacts123
|
|
||||||
ports:
|
|
||||||
- containerPort: 5432
|
|
||||||
volumeMounts:
|
|
||||||
- name: postgres-storage
|
|
||||||
mountPath: /var/lib/postgresql/data
|
|
||||||
livenessProbe:
|
|
||||||
exec:
|
|
||||||
command: ["pg_isready", "-U", "artifacts", "-d", "artifacts"]
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
periodSeconds: 30
|
|
||||||
volumes:
|
|
||||||
- name: postgres-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: postgres-pvc
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: postgres-service
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: postgres
|
|
||||||
ports:
|
|
||||||
- port: 5432
|
|
||||||
targetPort: 5432
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: postgres-pvc
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 10Gi
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Redis Deployment
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: redis
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: redis
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: redis
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: redis
|
|
||||||
image: redis:7-alpine
|
|
||||||
command: ["redis-server", "--save", "20", "1"]
|
|
||||||
ports:
|
|
||||||
- containerPort: 6379
|
|
||||||
volumeMounts:
|
|
||||||
- name: redis-storage
|
|
||||||
mountPath: /data
|
|
||||||
livenessProbe:
|
|
||||||
exec:
|
|
||||||
command: ["redis-cli", "ping"]
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
periodSeconds: 30
|
|
||||||
volumes:
|
|
||||||
- name: redis-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: redis-pvc
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: redis-service
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: redis
|
|
||||||
ports:
|
|
||||||
- port: 6379
|
|
||||||
targetPort: 6379
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: redis-pvc
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 5Gi
|
|
||||||
```
|
|
||||||
|
|
||||||
### 6. MinIO Deployment
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: minio
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: minio
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: minio
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: minio
|
|
||||||
image: minio/minio:latest
|
|
||||||
command: ["minio", "server", "/data", "--console-address", ":9001"]
|
|
||||||
env:
|
|
||||||
- name: MINIO_ROOT_USER
|
|
||||||
value: minioadmin
|
|
||||||
- name: MINIO_ROOT_PASSWORD
|
|
||||||
value: minioadmin
|
|
||||||
ports:
|
|
||||||
- containerPort: 9000
|
|
||||||
- containerPort: 9001
|
|
||||||
volumeMounts:
|
|
||||||
- name: minio-storage
|
|
||||||
mountPath: /data
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /minio/health/live
|
|
||||||
port: 9000
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
periodSeconds: 30
|
|
||||||
volumes:
|
|
||||||
- name: minio-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: minio-pvc
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: minio-service
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: minio
|
|
||||||
ports:
|
|
||||||
- name: api
|
|
||||||
port: 9000
|
|
||||||
targetPort: 9000
|
|
||||||
- name: console
|
|
||||||
port: 9001
|
|
||||||
targetPort: 9001
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: minio-pvc
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 50Gi
|
|
||||||
```
|
|
||||||
|
|
||||||
### 7. Artifact API Deployment
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: artifactapi
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
replicas: 2
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: artifactapi
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: artifactapi
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: artifactapi
|
|
||||||
image: artifactapi:latest
|
|
||||||
ports:
|
|
||||||
- containerPort: 8000
|
|
||||||
envFrom:
|
|
||||||
- secretRef:
|
|
||||||
name: artifactapi-secret
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /app/remotes.yaml
|
|
||||||
subPath: remotes.yaml
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health
|
|
||||||
port: 8000
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
periodSeconds: 30
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health
|
|
||||||
port: 8000
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 5
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
memory: "256Mi"
|
|
||||||
cpu: "250m"
|
|
||||||
limits:
|
|
||||||
memory: "512Mi"
|
|
||||||
cpu: "500m"
|
|
||||||
volumes:
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: artifactapi-config
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: artifactapi-service
|
|
||||||
namespace: artifact-storage
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: artifactapi
|
|
||||||
ports:
|
|
||||||
- port: 8000
|
|
||||||
targetPort: 8000
|
|
||||||
type: ClusterIP
|
|
||||||
```
|
|
||||||
|
|
||||||
### 8. Ingress (Optional)
|
|
||||||
```yaml
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: artifactapi-ingress
|
|
||||||
namespace: artifact-storage
|
|
||||||
annotations:
|
|
||||||
nginx.ingress.kubernetes.io/rewrite-target: /
|
|
||||||
nginx.ingress.kubernetes.io/proxy-body-size: "10g"
|
|
||||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
|
|
||||||
spec:
|
|
||||||
rules:
|
|
||||||
- host: artifacts.example.com
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: artifactapi-service
|
|
||||||
port:
|
|
||||||
number: 8000
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deployment Commands
|
|
||||||
```bash
|
|
||||||
# Create namespace
|
|
||||||
kubectl apply -f namespace.yaml
|
|
||||||
|
|
||||||
# Deploy PostgreSQL, Redis, and MinIO
|
|
||||||
kubectl apply -f postgres.yaml
|
|
||||||
kubectl apply -f redis.yaml
|
|
||||||
kubectl apply -f minio.yaml
|
|
||||||
|
|
||||||
# Wait for databases to be ready
|
|
||||||
kubectl wait --for=condition=ready pod -l app=postgres -n artifact-storage --timeout=300s
|
|
||||||
kubectl wait --for=condition=ready pod -l app=redis -n artifact-storage --timeout=300s
|
|
||||||
kubectl wait --for=condition=ready pod -l app=minio -n artifact-storage --timeout=300s
|
|
||||||
|
|
||||||
# Deploy configuration and application
|
|
||||||
kubectl apply -f configmap.yaml
|
|
||||||
kubectl apply -f secret.yaml
|
|
||||||
kubectl apply -f artifactapi.yaml
|
|
||||||
|
|
||||||
# Optional: Deploy ingress
|
|
||||||
kubectl apply -f ingress.yaml
|
|
||||||
|
|
||||||
# Check deployment status
|
|
||||||
kubectl get pods -n artifact-storage
|
|
||||||
kubectl logs -f deployment/artifactapi -n artifact-storage
|
|
||||||
```
|
|
||||||
|
|
||||||
### Access the API
|
|
||||||
```bash
|
|
||||||
# Port-forward to access locally
|
|
||||||
kubectl port-forward service/artifactapi-service 8000:8000 -n artifact-storage
|
|
||||||
|
|
||||||
# Test the API
|
|
||||||
curl http://localhost:8000/health
|
|
||||||
curl http://localhost:8000/
|
|
||||||
|
|
||||||
# Access artifacts
|
|
||||||
curl "http://localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Notes for Production
|
|
||||||
- Use proper secrets management (e.g., Vault, Sealed Secrets)
|
|
||||||
- Configure resource limits and requests appropriately
|
|
||||||
- Set up monitoring and alerting
|
|
||||||
- Use external managed databases for production workloads
|
|
||||||
- Configure backup strategies for persistent volumes
|
|
||||||
- Set up proper TLS certificates for ingress
|
|
||||||
- Consider using StatefulSets for databases with persistent storage
|
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
remotes:
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 7200
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
immutable_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 0
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
remotes:
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Python Package Index"
|
||||||
|
check_mutable_updates: true
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
artifactapi:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
args:
|
||||||
|
- VERSION=2.2.2.dev0
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
- ./examples/single-file/remotes.yaml:/app/remotes.yaml:ro,z
|
||||||
|
- ./ca-bundle.pem:/app/ca-bundle.pem:ro,z
|
||||||
|
environment:
|
||||||
|
- CONFIG_PATH=/app/remotes.yaml
|
||||||
|
- DBHOST=postgres
|
||||||
|
- DBPORT=5432
|
||||||
|
- DBUSER=artifacts
|
||||||
|
- DBPASS=artifacts123
|
||||||
|
- DBNAME=artifacts
|
||||||
|
- REDIS_URL=redis://redis:6379
|
||||||
|
- MINIO_ENDPOINT=minio:9000
|
||||||
|
- MINIO_ACCESS_KEY=minioadmin
|
||||||
|
- MINIO_SECRET_KEY=minioadmin
|
||||||
|
- MINIO_BUCKET=artifacts
|
||||||
|
- MINIO_SECURE=false
|
||||||
|
- REQUESTS_CA_BUNDLE=/app/ca-bundle.pem
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
minio:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
minio:
|
||||||
|
image: minio/minio:latest
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: minioadmin
|
||||||
|
MINIO_ROOT_PASSWORD: minioadmin
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
volumes:
|
||||||
|
- minio_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 20s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
command: redis-server --save 20 1
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: artifacts
|
||||||
|
POSTGRES_USER: artifacts
|
||||||
|
POSTGRES_PASSWORD: artifacts123
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U artifacts -d artifacts"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
minio_data:
|
||||||
|
redis_data:
|
||||||
|
postgres_data:
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
remotes:
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 7200
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
immutable_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 0
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
remotes:
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Python Package Index"
|
||||||
|
check_mutable_updates: true
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
@@ -0,0 +1,277 @@
|
|||||||
|
# Example remotes configuration — copy and adapt for your environment.
|
||||||
|
#
|
||||||
|
# immutable_patterns: artifacts cached forever (e.g. release binaries, versioned tags).
|
||||||
|
# mutable_patterns: artifacts that expire after cache.mutable_ttl seconds and are
|
||||||
|
# re-fetched from upstream on next request (e.g. index files,
|
||||||
|
# branch archives). Defaults to the package-type built-ins when
|
||||||
|
# not set (APKINDEX, repomd.xml, Docker manifests, etc.).
|
||||||
|
# cache:
|
||||||
|
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
|
||||||
|
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
|
||||||
|
#
|
||||||
|
# quarantine_new: Set to true to block immutable artifacts published within the last
|
||||||
|
# quarantine_days days. Requests return 404 until the quarantine period
|
||||||
|
# expires. Fails open when the publish date cannot be determined.
|
||||||
|
# quarantine_days: Number of days to quarantine newly published artifacts (requires
|
||||||
|
# quarantine_new: true). The upstream Last-Modified header is used as
|
||||||
|
# the publish date.
|
||||||
|
#
|
||||||
|
# WARNING: this file may contain credentials — do not commit real values.
|
||||||
|
#
|
||||||
|
# Global configuration
|
||||||
|
#s3:
|
||||||
|
# endpoint: "localhost:9000"
|
||||||
|
# access_key: "minioadmin"
|
||||||
|
# secret_key: "minioadmin"
|
||||||
|
# bucket: "artifacts"
|
||||||
|
# secure: false
|
||||||
|
#
|
||||||
|
#redis:
|
||||||
|
# url: "redis://localhost:6379/0"
|
||||||
|
#
|
||||||
|
#database:
|
||||||
|
# url: "postgresql://artifacts:artifacts123@localhost:5432/artifacts"
|
||||||
|
#
|
||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
immutable_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "VictoriaMetrics/VictoriaMetrics/.*/vmutils-linux-amd64-.*\\.tar\\.gz$"
|
||||||
|
- "VictoriaMetrics/VictoriaMetrics/.*/victoria-metrics-linux-amd64-.*-cluster\\.tar\\.gz$"
|
||||||
|
- "VictoriaMetrics/VictoriaMetrics/.*/victoria-logs-linux-amd64-.*\\.tar\\.gz$"
|
||||||
|
- "VictoriaMetrics/VictoriaMetrics/.*/vlutils-linux-amd64-.*\\.tar\\.gz$"
|
||||||
|
- "prometheus-community/bind_exporter/.*/bind_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "prometheus-community/pgbouncer_exporter/.*/pgbouncer_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "prometheus-community/postgres_exporter/.*/postgres_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "onedr0p/exportarr/.*/exportarr_.*_linux_amd64\\.tar\\.gz$"
|
||||||
|
- "tynany/frr_exporter/.*/frr_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "camptocamp/prometheus-puppetdb-exporter/.*/prometheus-puppetdb-exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
- "grafana/jsonnet-language-server/.*/jsonnet-language-server_.*_linux_amd64$"
|
||||||
|
- "helmfile/helmfile/.*/helmfile_.*_linux_amd64\\.tar\\.gz$"
|
||||||
|
- "helmfile/vals/.*/vals_.*_linux_amd64\\.tar\\.gz$"
|
||||||
|
- "openbao/openbao-plugins/.*/openbao-plugin-secrets-consul_linux_amd64_.*\\.tar\\.gz$"
|
||||||
|
- "openbao/openbao-plugins/.*/openbao-plugin-secrets-nomad_linux_amd64_.*\\.tar\\.gz$"
|
||||||
|
- "apple/foundationdb/.*/libfdb_c\\.x86_64\\.so$"
|
||||||
|
- "stalwartlabs/stalwart/.*/stalwart-cli-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||||
|
- "stalwartlabs/stalwart/.*/stalwart-foundationdb-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||||
|
- "stalwartlabs/stalwart/.*/stalwart-x86_64-unknown-linux-gnu\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 0
|
||||||
|
|
||||||
|
github-archive:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub repository archive tarballs"
|
||||||
|
immutable_patterns:
|
||||||
|
# Tag archives are immutable — a tag never changes
|
||||||
|
- ".*/archive/refs/tags/.*\\.tar\\.gz$"
|
||||||
|
mutable_patterns:
|
||||||
|
# Branch archives can change on every push
|
||||||
|
- ".*/archive/refs/heads/main\\.tar\\.gz$"
|
||||||
|
- ".*/archive/refs/heads/master\\.tar\\.gz$"
|
||||||
|
# Before re-downloading an expired branch archive, check whether it has
|
||||||
|
# actually changed (304 Not Modified → just refresh the TTL, no transfer).
|
||||||
|
# Only applies to user-defined mutable_patterns, not package-type defaults.
|
||||||
|
check_mutable_updates: true
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Tag archives cached indefinitely
|
||||||
|
mutable_ttl: 86400 # Branch archives refreshed after 1 day
|
||||||
|
|
||||||
|
gitea-dl:
|
||||||
|
base_url: "https://dl.gitea.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "Gitea download site"
|
||||||
|
immutable_patterns:
|
||||||
|
- "act_runner/.*/act_runner-.*-linux-amd64$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 0
|
||||||
|
|
||||||
|
hashicorp-releases:
|
||||||
|
base_url: "https://releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "HashiCorp product releases"
|
||||||
|
immutable_patterns:
|
||||||
|
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||||
|
- "terraform/.*terraform_.*_windows_amd64\\.zip$"
|
||||||
|
- "terraform/.*terraform_.*_darwin_amd64\\.zip$"
|
||||||
|
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||||
|
- "vault/.*vault_.*_windows_amd64\\.zip$"
|
||||||
|
- "vault/.*vault_.*_darwin_amd64\\.zip$"
|
||||||
|
- "consul-cni/.*/consul-cni_.*_linux_amd64\\.zip$"
|
||||||
|
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||||
|
- "nomad-autoscaler/.*/nomad-autoscaler_.*_linux_amd64\\.zip$"
|
||||||
|
- "nomad/.*/nomad_.*_linux_amd64\\.zip$"
|
||||||
|
- "packer/.*/packer_.*_linux_amd64\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 0
|
||||||
|
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$"
|
||||||
|
# check_mutable_updates not set: APKINDEX.tar.gz is a package-type default
|
||||||
|
# and is always re-fetched on expiry — conditional checks are skipped for
|
||||||
|
# built-in mutable patterns regardless of this flag.
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 7200 # Index files (APKINDEX.tar.gz) cached for 2 hours
|
||||||
|
|
||||||
|
almalinux:
|
||||||
|
base_url: "https://gsl-syd.mm.fcix.net/almalinux"
|
||||||
|
type: "remote"
|
||||||
|
package: "rpm"
|
||||||
|
description: "AlmaLinux RPM package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
- ".*/repodata/.*$"
|
||||||
|
- ".*\\.rpm$" # Allow all RPM files
|
||||||
|
# repomd.xml / repodata are package-type defaults — always re-fetched on
|
||||||
|
# expiry. check_mutable_updates would only apply to any custom
|
||||||
|
# mutable_patterns added here.
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 7200 # Metadata files cached for 2 hours
|
||||||
|
|
||||||
|
epel:
|
||||||
|
base_url: "http://mirror.aarnet.edu.au/pub/epel"
|
||||||
|
type: "remote"
|
||||||
|
package: "rpm"
|
||||||
|
description: "EPEL (Extra Packages for Enterprise Linux)"
|
||||||
|
immutable_patterns:
|
||||||
|
- "8/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- "9/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- "10/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
- ".*/repodata/.*$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 7200 # Metadata files cached for 2 hours
|
||||||
|
|
||||||
|
fedora:
|
||||||
|
base_url: "https://gsl-syd.mm.fcix.net/fedora/linux"
|
||||||
|
type: "remote"
|
||||||
|
package: "rpm"
|
||||||
|
description: "Fedora Linux RPM package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- "releases/.*/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- "updates/.*/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- "development/.*/Everything/x86_64/.*\\.rpm$"
|
||||||
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
- "updates/.*/Everything/x86_64/repodata/.*$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 300 # Metadata files cached for 5 minutes
|
||||||
|
|
||||||
|
ghcr:
|
||||||
|
base_url: "https://ghcr.io"
|
||||||
|
type: "remote"
|
||||||
|
package: "docker"
|
||||||
|
description: "GitHub Container Registry"
|
||||||
|
# username: "your-github-username"
|
||||||
|
# password: "your-github-pat" # needs read:packages scope
|
||||||
|
# Docker manifest/tag-list patterns are package-type defaults — always
|
||||||
|
# re-fetched on expiry. check_mutable_updates only applies to any custom
|
||||||
|
# mutable_patterns you add (e.g. a metadata endpoint).
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 300
|
||||||
|
|
||||||
|
dockerhub:
|
||||||
|
base_url: "https://registry-1.docker.io"
|
||||||
|
type: "remote"
|
||||||
|
package: "docker"
|
||||||
|
description: "Docker Hub registry"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 300
|
||||||
|
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Python Package Index — simple index and package files via a single remote"
|
||||||
|
# simple/ requests are transparently fetched from pypi.org; package files come from
|
||||||
|
# files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote.
|
||||||
|
check_mutable_updates: true
|
||||||
|
# Block packages published within the last 3 days (supply-chain attack mitigation).
|
||||||
|
# Immutable artifacts (wheel/sdist) newer than quarantine_days return 404 until
|
||||||
|
# the window passes. Disable by setting quarantine_new: false or removing both keys.
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
- "packages/.*\\.egg$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600 # Simple index pages refreshed after 10 minutes
|
||||||
|
|
||||||
|
pypi-gitea:
|
||||||
|
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Private Gitea PyPI registry — simple index and files at the same host"
|
||||||
|
# username: "your-gitea-username"
|
||||||
|
# password: "your-personal-access-token" # needs package:read scope
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "files/.*\\.whl$"
|
||||||
|
- "files/.*\\.whl\\.metadata$"
|
||||||
|
- "files/.*\\.tar\\.gz$"
|
||||||
|
- "files/.*\\.zip$"
|
||||||
|
- "files/.*\\.egg$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
|
|
||||||
|
npm:
|
||||||
|
base_url: "https://registry.npmjs.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "npm"
|
||||||
|
description: "npm registry — package metadata with tarball URL rewriting"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- \.tgz$
|
||||||
|
mutable_patterns:
|
||||||
|
- ^(?!.*\.tgz$).*
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600 # Package metadata refreshed after 10 minutes
|
||||||
|
|
||||||
|
hashicorp-helm:
|
||||||
|
base_url: "https://helm.releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "helm"
|
||||||
|
description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)"
|
||||||
|
check_mutable_updates: true
|
||||||
|
immutable_patterns:
|
||||||
|
- "\\.tgz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Chart tarballs are versioned — cache forever
|
||||||
|
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||||
|
|
||||||
|
local-generic:
|
||||||
|
type: "local"
|
||||||
|
package: "generic"
|
||||||
|
description: "Local generic file repository"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0 # Files cached indefinitely
|
||||||
|
mutable_ttl: 0
|
||||||
+14
-1
@@ -42,5 +42,18 @@ dev = [
|
|||||||
"black>=23.9.0",
|
"black>=23.9.0",
|
||||||
"isort>=5.12.0",
|
"isort>=5.12.0",
|
||||||
"mypy>=1.6.0",
|
"mypy>=1.6.0",
|
||||||
"ruff>=0.1.0",
|
"ruff>=0.4.0",
|
||||||
|
"tox>=4.0.0",
|
||||||
|
"pre-commit>=3.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
asyncio_mode = "auto"
|
||||||
|
testpaths = ["tests"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 140
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "I", "UP"]
|
||||||
|
ignore = ["E501"]
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from . import discovery, docker, flush, local, proxy
|
||||||
|
|
||||||
|
__all__ = ["discovery", "docker", "flush", "local", "proxy"]
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from .proxy import cache_single_artifact
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def _discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
||||||
|
if not match:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
||||||
|
|
||||||
|
owner, repo = match.groups()
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise HTTPException(status_code=response.status_code, detail=f"Failed to fetch releases: {response.text}")
|
||||||
|
|
||||||
|
releases = response.json()
|
||||||
|
regex = re.compile(include_pattern.replace("*", ".*"))
|
||||||
|
return [
|
||||||
|
asset["browser_download_url"]
|
||||||
|
for release in releases
|
||||||
|
for asset in release.get("assets", [])
|
||||||
|
if regex.search(asset["browser_download_url"])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def _discover(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
if "github.com" in remote:
|
||||||
|
return await _discover_github_releases(remote, include_pattern)
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
||||||
|
|
||||||
|
|
||||||
|
async def cache_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await _discover(remote, include_pattern)
|
||||||
|
|
||||||
|
if not matching_urls:
|
||||||
|
return {"message": "No matching artifacts found", "cached_count": 0, "artifacts": []}
|
||||||
|
|
||||||
|
cached_artifacts = []
|
||||||
|
for url in matching_urls:
|
||||||
|
result = await cache_single_artifact(url, "", "", storage, {})
|
||||||
|
cached_artifacts.append(result)
|
||||||
|
|
||||||
|
cached_count = sum(1 for a in cached_artifacts if a["status"] in ["cached", "already_cached"])
|
||||||
|
return {
|
||||||
|
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||||
|
"cached_count": cached_count,
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def list_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await _discover(remote, include_pattern)
|
||||||
|
cached_artifacts = []
|
||||||
|
for url in matching_urls:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
key = storage.get_object_key(remote, parsed.path)
|
||||||
|
if storage.exists(key):
|
||||||
|
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"remote": remote,
|
||||||
|
"pattern": include_pattern,
|
||||||
|
"total_found": len(matching_urls),
|
||||||
|
"cached_count": len(cached_artifacts),
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from . import proxy as _proxy
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def ping() -> Response:
|
||||||
|
return Response(
|
||||||
|
content="{}",
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def proxy(request: Request, remote_name: str, path: str, storage, cache, config, metrics) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("package") != "docker":
|
||||||
|
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
||||||
|
|
||||||
|
patterns = config.get_immutable_patterns(remote_name, "")
|
||||||
|
if patterns:
|
||||||
|
path_parts = path.split("/")
|
||||||
|
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
||||||
|
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
||||||
|
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
||||||
|
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
||||||
|
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
remote_url = f"{base_url}/v2/{path}"
|
||||||
|
|
||||||
|
cached_key = storage.get_object_key(remote_name, path)
|
||||||
|
if not storage.exists(cached_key):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||||
|
|
||||||
|
if cached_key and is_mutable:
|
||||||
|
if not cache.is_index_valid(remote_name, path):
|
||||||
|
if not await _proxy.handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
if not cached_key:
|
||||||
|
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||||
|
result = await _proxy.cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||||
|
if result["status"] == "error":
|
||||||
|
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
||||||
|
if result["status"] == "cached" and is_mutable:
|
||||||
|
cache_config = config.get_cache_config(remote_name)
|
||||||
|
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||||
|
if result.get("etag") or result.get("last_modified"):
|
||||||
|
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||||
|
if not is_mutable:
|
||||||
|
published = result.get("last_modified")
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_proxy._check_quarantine(remote_name, published, config)
|
||||||
|
elif not is_mutable:
|
||||||
|
published = cache.get_artifact_published(remote_name, path)
|
||||||
|
if not published:
|
||||||
|
published = await _proxy._fetch_last_modified(remote_url, remote_config)
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_proxy._check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||||
|
|
||||||
|
is_blob = "/blobs/" in path
|
||||||
|
if is_blob:
|
||||||
|
content_type = "application/octet-stream"
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
manifest_json = json.loads(artifact_data)
|
||||||
|
content_type = manifest_json.get("mediaType")
|
||||||
|
if not content_type:
|
||||||
|
if "manifests" in manifest_json:
|
||||||
|
content_type = "application/vnd.oci.image.index.v1+json"
|
||||||
|
else:
|
||||||
|
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||||
|
except Exception:
|
||||||
|
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||||
|
|
||||||
|
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
||||||
|
headers = {
|
||||||
|
"Docker-Distribution-Api-Version": "registry/2.0",
|
||||||
|
"Docker-Content-Digest": digest,
|
||||||
|
"Content-Length": str(len(artifact_data)),
|
||||||
|
}
|
||||||
|
|
||||||
|
if request.method == "HEAD":
|
||||||
|
return Response(status_code=200, headers=headers, media_type=content_type)
|
||||||
|
|
||||||
|
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||||
|
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def handle(remote: str | None, cache_type: str, cache, storage) -> dict:
|
||||||
|
try:
|
||||||
|
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||||
|
|
||||||
|
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||||
|
patterns = []
|
||||||
|
|
||||||
|
if cache_type in ["all", "index"]:
|
||||||
|
if remote:
|
||||||
|
patterns += [f"index:{remote}:*", f"mutable:meta:{remote}:*"]
|
||||||
|
else:
|
||||||
|
patterns += ["index:*", "mutable:meta:*"]
|
||||||
|
|
||||||
|
if cache_type in ["all", "metrics"]:
|
||||||
|
patterns.append(f"metrics:*:{remote}" if remote else "metrics:*")
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
keys = cache.client.keys(pattern)
|
||||||
|
if keys:
|
||||||
|
cache.client.delete(*keys)
|
||||||
|
result["flushed"]["redis_keys"] += len(keys)
|
||||||
|
logger.info(f"Cache flush: deleted {len(keys)} Redis keys matching '{pattern}'")
|
||||||
|
|
||||||
|
if result["flushed"]["redis_keys"] > 0:
|
||||||
|
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
||||||
|
|
||||||
|
if cache_type in ["all", "files"]:
|
||||||
|
try:
|
||||||
|
list_params = {"Bucket": storage.bucket}
|
||||||
|
if remote:
|
||||||
|
list_params["Prefix"] = f"{remote}/"
|
||||||
|
|
||||||
|
response = storage.client.list_objects_v2(**list_params)
|
||||||
|
if "Contents" in response:
|
||||||
|
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||||
|
for key in objects_to_delete:
|
||||||
|
try:
|
||||||
|
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
||||||
|
result["flushed"]["s3_objects"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
||||||
|
|
||||||
|
if objects_to_delete:
|
||||||
|
scope = f" for remote '{remote}'" if remote else ""
|
||||||
|
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||||
|
logger.info(f"Cache flush: deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
||||||
|
logger.error(f"Cache flush S3 error: {e}")
|
||||||
|
|
||||||
|
if not result["flushed"]["operations"]:
|
||||||
|
result["flushed"]["operations"].append("No cache entries found to flush")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Cache flush error: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Response, UploadFile
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def upload(remote_name: str, path: str, file: UploadFile, storage, database, config) -> JSONResponse:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = await file.read()
|
||||||
|
sha256_sum = hashlib.sha256(content).hexdigest()
|
||||||
|
|
||||||
|
if database.file_exists(remote_name, path):
|
||||||
|
raise HTTPException(status_code=409, detail="File already exists")
|
||||||
|
|
||||||
|
s3_key = f"local/{remote_name}/{path}"
|
||||||
|
content_type = file.content_type or "application/octet-stream"
|
||||||
|
|
||||||
|
try:
|
||||||
|
storage.upload(s3_key, content)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
||||||
|
|
||||||
|
success = database.add_local_file(
|
||||||
|
repository_name=remote_name,
|
||||||
|
file_path=path,
|
||||||
|
s3_key=s3_key,
|
||||||
|
size_bytes=len(content),
|
||||||
|
sha256_sum=sha256_sum,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
storage.delete_object(s3_key)
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"message": "File uploaded successfully",
|
||||||
|
"file_path": path,
|
||||||
|
"size_bytes": len(content),
|
||||||
|
"sha256_sum": sha256_sum,
|
||||||
|
"content_type": content_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_exists(remote_name: str, path: str, database, config) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
headers={
|
||||||
|
"Content-Length": str(metadata["size_bytes"]),
|
||||||
|
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||||
|
"X-SHA256": metadata["sha256_sum"],
|
||||||
|
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||||
|
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def delete(remote_name: str, path: str, storage, database, config) -> JSONResponse:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3_key = database.delete_local_file(remote_name, path)
|
||||||
|
if not s3_key:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
if not storage.delete_object(s3_key):
|
||||||
|
logger.warning(f"Failed to delete S3 object {s3_key} after database removal")
|
||||||
|
|
||||||
|
return JSONResponse({"message": "File deleted successfully"})
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
||||||
@@ -0,0 +1,331 @@
|
|||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from ..auth import get_docker_token_for_response
|
||||||
|
from ..remote import helm as _helm
|
||||||
|
from ..remote import npm as _npm
|
||||||
|
from ..remote import python as _pypi
|
||||||
|
from ..remote.base import get_content_type
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamUnreachable(Exception):
|
||||||
|
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||||
|
|
||||||
|
|
||||||
|
def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None:
|
||||||
|
"""Raise HTTP 404 if the artifact is within the per-remote quarantine window.
|
||||||
|
|
||||||
|
Fails open (allows the request) when the publish date cannot be determined.
|
||||||
|
"""
|
||||||
|
enabled, days = config.get_quarantine_config(remote_name)
|
||||||
|
if not enabled or not days:
|
||||||
|
return
|
||||||
|
if not last_modified_str:
|
||||||
|
return # cannot determine age → allow
|
||||||
|
try:
|
||||||
|
publish_date = parsedate_to_datetime(last_modified_str)
|
||||||
|
except Exception:
|
||||||
|
return # unparseable → allow
|
||||||
|
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||||
|
if publish_date > cutoff:
|
||||||
|
available_on = (publish_date + timedelta(days=days)).date()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=(
|
||||||
|
f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None:
|
||||||
|
"""HEAD the upstream URL and return the Last-Modified header, or None on any failure."""
|
||||||
|
auth = _basic_auth_header(remote_cfg)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.head(remote_url, headers=auth, timeout=10.0)
|
||||||
|
return response.headers.get("Last-Modified")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
||||||
|
username = remote_cfg.get("username")
|
||||||
|
password = remote_cfg.get("password")
|
||||||
|
if username and password:
|
||||||
|
token = base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||||
|
return {"Authorization": f"Basic {token}"}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
remote_config: dict,
|
||||||
|
request: Request,
|
||||||
|
remote_name: str = "",
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
package = remote_config.get("package")
|
||||||
|
proxy_base = str(request.base_url).rstrip("/")
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
|
||||||
|
if package == "pypi":
|
||||||
|
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
|
if package == "npm":
|
||||||
|
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
|
if package == "helm":
|
||||||
|
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
|
||||||
|
return data, get_content_type(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(remote_config: dict, path: str) -> str:
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
if remote_config.get("package") == "docker":
|
||||||
|
return f"{base_url}/v2/{path}"
|
||||||
|
if remote_config.get("package") == "pypi":
|
||||||
|
return _pypi.construct_url(base_url, path)
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
async def cache_single_artifact(url: str, remote_name: str, path: str, storage, remote_config: dict) -> dict:
|
||||||
|
key = storage.get_object_key(remote_name, path)
|
||||||
|
|
||||||
|
if storage.exists(key):
|
||||||
|
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||||
|
return {"url": url, "cached_url": storage.get_url(key), "status": "already_cached"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
||||||
|
headers = {}
|
||||||
|
username = remote_config.get("username")
|
||||||
|
password = remote_config.get("password")
|
||||||
|
|
||||||
|
if is_docker:
|
||||||
|
if "/manifests/" in url:
|
||||||
|
headers["Accept"] = (
|
||||||
|
"application/vnd.docker.distribution.manifest.v2+json,"
|
||||||
|
"application/vnd.oci.image.manifest.v1+json,"
|
||||||
|
"application/vnd.oci.image.index.v1+json,"
|
||||||
|
"application/vnd.docker.distribution.manifest.list.v2+json"
|
||||||
|
)
|
||||||
|
elif "/blobs/" in url:
|
||||||
|
headers["Accept"] = "application/octet-stream"
|
||||||
|
elif username and password:
|
||||||
|
headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 401 and is_docker:
|
||||||
|
www_auth = response.headers.get("WWW-Authenticate", "")
|
||||||
|
token = await get_docker_token_for_response(www_auth, username, password)
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
storage.upload(key, response.content)
|
||||||
|
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"url": url,
|
||||||
|
"cached_url": storage.get_url(key),
|
||||||
|
"storage_path": f"s3://{storage.bucket}/{key}",
|
||||||
|
"size": len(response.content),
|
||||||
|
"status": "cached",
|
||||||
|
"etag": response.headers.get("ETag"),
|
||||||
|
"last_modified": response.headers.get("Last-Modified"),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"url": url, "status": "error", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool:
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
await client.head(url, headers=auth_headers or {}, timeout=10.0)
|
||||||
|
return True
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException):
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def check_upstream_changed(remote_url: str, remote_name: str, path: str, cache, auth_headers: dict | None = None) -> bool:
|
||||||
|
meta = cache.get_mutable_meta(remote_name, path)
|
||||||
|
if not meta:
|
||||||
|
return True
|
||||||
|
|
||||||
|
headers = dict(auth_headers or {})
|
||||||
|
if meta.get("etag"):
|
||||||
|
headers["If-None-Match"] = meta["etag"]
|
||||||
|
if meta.get("last_modified"):
|
||||||
|
headers["If-Modified-Since"] = meta["last_modified"]
|
||||||
|
if not (meta.get("etag") or meta.get("last_modified")):
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.head(remote_url, headers=headers)
|
||||||
|
return response.status_code != 304
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||||
|
raise UpstreamUnreachable(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str, config, cache, storage) -> bool:
|
||||||
|
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||||
|
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||||
|
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||||
|
auth = _basic_auth_header(remote_cfg)
|
||||||
|
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||||
|
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||||
|
|
||||||
|
if user_mutable:
|
||||||
|
try:
|
||||||
|
changed = await check_upstream_changed(remote_url, remote_name, path, cache, auth)
|
||||||
|
except UpstreamUnreachable:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
if not changed:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||||
|
else:
|
||||||
|
if not await _upstream_reachable(remote_url, auth):
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||||
|
|
||||||
|
cache.cleanup_expired_index(storage, remote_name, path)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def handle(request: Request, remote_name: str, path: str, storage, cache, config, database, metrics) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
|
||||||
|
if remote_config.get("type") == "local":
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
content = storage.download_object(metadata["s3_key"])
|
||||||
|
if content is None:
|
||||||
|
raise HTTPException(status_code=500, detail="File not accessible")
|
||||||
|
return Response(
|
||||||
|
content=content,
|
||||||
|
media_type=metadata.get("content_type", "application/octet-stream"),
|
||||||
|
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
path_parts = path.split("/")
|
||||||
|
if len(path_parts) >= 2:
|
||||||
|
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
||||||
|
file_path = "/".join(path_parts[2:])
|
||||||
|
else:
|
||||||
|
repo_path = path
|
||||||
|
file_path = path
|
||||||
|
|
||||||
|
mutable_patterns = config.get_mutable_patterns(remote_name)
|
||||||
|
if not cache.is_mutable_file(file_path, mutable_patterns) and not cache.is_mutable_file(path, mutable_patterns):
|
||||||
|
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
||||||
|
if patterns and not any(re.search(p, file_path) or re.search(p, path) for p in patterns):
|
||||||
|
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||||
|
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||||
|
|
||||||
|
remote_url = construct_url(remote_config, path)
|
||||||
|
if not remote_config.get("base_url"):
|
||||||
|
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||||
|
|
||||||
|
cached_key = storage.get_object_key(remote_name, path)
|
||||||
|
if not storage.exists(cached_key):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
is_mutable = cache.is_mutable_file(path, mutable_patterns)
|
||||||
|
|
||||||
|
if cached_key and is_mutable:
|
||||||
|
if not cache.is_index_valid(remote_name, path):
|
||||||
|
if not await handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
if cached_key:
|
||||||
|
if not is_mutable:
|
||||||
|
published = cache.get_artifact_published(remote_name, path)
|
||||||
|
if not published:
|
||||||
|
published = await _fetch_last_modified(remote_url, remote_config)
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
artifact_data = storage.download_object(cached_key)
|
||||||
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||||
|
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||||
|
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "cache",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||||
|
|
||||||
|
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||||
|
result = await cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||||
|
|
||||||
|
if result["status"] == "error":
|
||||||
|
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||||
|
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||||
|
|
||||||
|
if result["status"] == "cached" and is_mutable:
|
||||||
|
cache_config = config.get_cache_config(remote_name)
|
||||||
|
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||||
|
if result.get("etag") or result.get("last_modified"):
|
||||||
|
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||||
|
|
||||||
|
if not is_mutable:
|
||||||
|
published = result.get("last_modified")
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
|
artifact_data = storage.download_object(cache_key)
|
||||||
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||||
|
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "remote",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .docker import fetch_token, get_docker_token_for_response, parse_www_authenticate
|
||||||
|
|
||||||
|
__all__ = ["fetch_token", "get_docker_token_for_response", "parse_www_authenticate"]
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# In-memory token cache: key -> (token, expires_at)
|
||||||
|
_token_cache: dict[str, tuple[str, float]] = {}
|
||||||
|
|
||||||
|
_WWW_AUTH_RE = re.compile(
|
||||||
|
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
||||||
|
r'(?:,service="(?P<service>[^"]*)")?'
|
||||||
|
r'(?:,scope="(?P<scope>[^"]*)")?',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||||
|
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_token(key: str) -> str | None:
|
||||||
|
entry = _token_cache.get(key)
|
||||||
|
if entry and entry[1] > time.time():
|
||||||
|
return entry[0]
|
||||||
|
_token_cache.pop(key, None)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _store_token(key: str, token: str, expires_in: int) -> None:
|
||||||
|
# Expire 30s early to avoid using a token right as it expires
|
||||||
|
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_token(
|
||||||
|
realm: str,
|
||||||
|
service: str,
|
||||||
|
scope: str,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||||
|
key = _cache_key(realm, service, scope, username)
|
||||||
|
cached = _get_cached_token(key)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
params: dict[str, str] = {}
|
||||||
|
if service:
|
||||||
|
params["service"] = service
|
||||||
|
if scope:
|
||||||
|
params["scope"] = scope
|
||||||
|
|
||||||
|
auth = (username, password) if username and password else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(realm, params=params, auth=auth)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
token = data.get("token") or data.get("access_token")
|
||||||
|
if not token:
|
||||||
|
logger.warning(f"Docker token response missing token field: {data}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
expires_in = int(data.get("expires_in", 300))
|
||||||
|
_store_token(key, token, expires_in)
|
||||||
|
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||||
|
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||||
|
m = _WWW_AUTH_RE.search(header)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
||||||
|
|
||||||
|
|
||||||
|
async def get_docker_token_for_response(
|
||||||
|
www_authenticate: str,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||||
|
parsed = parse_www_authenticate(www_authenticate)
|
||||||
|
if not parsed:
|
||||||
|
return None
|
||||||
|
realm, service, scope = parsed
|
||||||
|
return await fetch_token(realm, service, scope, username, password)
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
import time
|
|
||||||
import hashlib
|
|
||||||
import redis
|
|
||||||
|
|
||||||
|
|
||||||
class RedisCache:
|
|
||||||
def __init__(self, redis_url: str):
|
|
||||||
self.redis_url = redis_url
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
|
||||||
# Test connection
|
|
||||||
self.client.ping()
|
|
||||||
self.available = True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Redis not available: {e}")
|
|
||||||
self.client = None
|
|
||||||
self.available = False
|
|
||||||
|
|
||||||
def is_index_file(self, file_path: str) -> bool:
|
|
||||||
"""Check if the file is an index file that should have TTL"""
|
|
||||||
return (
|
|
||||||
file_path.endswith("APKINDEX.tar.gz")
|
|
||||||
or file_path.endswith("Packages.gz")
|
|
||||||
or file_path.endswith("repomd.xml")
|
|
||||||
or ("repodata/" in file_path
|
|
||||||
and file_path.endswith((
|
|
||||||
".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst",
|
|
||||||
".sqlite", ".sqlite.gz", ".sqlite.bz2", ".sqlite.xz", ".sqlite.zck", ".sqlite.zst",
|
|
||||||
".yaml.xz", ".yaml.gz", ".yaml.bz2", ".yaml.zst",
|
|
||||||
".asc", ".txt"
|
|
||||||
)))
|
|
||||||
# Docker tag-based manifests are mutable (index); digest-pinned are immutable (file)
|
|
||||||
or (
|
|
||||||
"/manifests/" in file_path
|
|
||||||
and not file_path.split("/manifests/", 1)[1].startswith("sha256:")
|
|
||||||
)
|
|
||||||
or "/tags/list" in file_path
|
|
||||||
or file_path.endswith("/tags/list")
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_index_cache_key(self, remote_name: str, path: str) -> str:
|
|
||||||
"""Generate cache key for index files"""
|
|
||||||
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
|
||||||
|
|
||||||
def is_index_valid(
|
|
||||||
self, remote_name: str, path: str, ttl_override: int = None
|
|
||||||
) -> bool:
|
|
||||||
"""Check if index file is still valid (not expired)"""
|
|
||||||
if not self.available:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
|
||||||
return self.client.exists(key) > 0
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
|
||||||
"""Mark index file as cached with TTL"""
|
|
||||||
if not self.available:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
|
||||||
self.client.setex(key, ttl, str(int(time.time())))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
|
||||||
"""Remove expired index from S3 storage"""
|
|
||||||
if not self.available:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Construct the URL the same way as in the main flow
|
|
||||||
from .config import ConfigManager
|
|
||||||
import os
|
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
|
||||||
if config_path:
|
|
||||||
config = ConfigManager(config_path)
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if remote_config:
|
|
||||||
base_url = remote_config.get("base_url")
|
|
||||||
if base_url:
|
|
||||||
# Use hierarchical path-based key (same as cache_single_artifact)
|
|
||||||
s3_key = storage.get_object_key(remote_name, path)
|
|
||||||
if storage.exists(s3_key):
|
|
||||||
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
Vendored
+3
@@ -0,0 +1,3 @@
|
|||||||
|
from .redis import RedisCache
|
||||||
|
|
||||||
|
__all__ = ["RedisCache"]
|
||||||
Vendored
+124
@@ -0,0 +1,124 @@
|
|||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import redis
|
||||||
|
|
||||||
|
|
||||||
|
class RedisCache:
|
||||||
|
def __init__(self, redis_url: str):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
|
self.client.ping()
|
||||||
|
self.available = True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Redis not available: {e}")
|
||||||
|
self.client = None
|
||||||
|
self.available = False
|
||||||
|
|
||||||
|
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||||
|
if patterns is None:
|
||||||
|
patterns = []
|
||||||
|
return any(re.search(p, file_path) for p in patterns)
|
||||||
|
|
||||||
|
def get_index_cache_key(self, remote_name: str, path: str) -> str:
|
||||||
|
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
|
def get_mutable_meta_key(self, remote_name: str, path: str) -> str:
|
||||||
|
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
|
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||||
|
if not self.available:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
|
return self.client.exists(key) > 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
|
self.client.setex(key, ttl, str(int(time.time())))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
data = {}
|
||||||
|
if etag:
|
||||||
|
data["etag"] = etag
|
||||||
|
if last_modified:
|
||||||
|
data["last_modified"] = last_modified
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.client.hset(self.get_mutable_meta_key(remote_name, path), mapping=data)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||||
|
if not self.available:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return self.client.hgetall(self.get_mutable_meta_key(remote_name, path)) or {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def delete_mutable_meta(self, remote_name: str, path: str) -> None:
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.client.delete(self.get_mutable_meta_key(remote_name, path))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_artifact_published_key(self, remote_name: str, path: str) -> str:
|
||||||
|
return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
|
def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None:
|
||||||
|
"""Persist the upstream Last-Modified header for a (typically immutable) artifact."""
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.client.set(self.get_artifact_published_key(remote_name, path), last_modified)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_artifact_published(self, remote_name: str, path: str) -> str | None:
|
||||||
|
"""Return the stored Last-Modified string for an artifact, or None."""
|
||||||
|
if not self.available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return self.client.get(self.get_artifact_published_key(remote_name, path))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
|
||||||
|
from ..config import ConfigManager
|
||||||
|
|
||||||
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
|
if config_path:
|
||||||
|
config = ConfigManager(config_path)
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if remote_config:
|
||||||
|
base_url = remote_config.get("base_url")
|
||||||
|
if base_url:
|
||||||
|
s3_key = storage.get_object_key(remote_name, path)
|
||||||
|
if storage.exists(s3_key):
|
||||||
|
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.delete_mutable_meta(remote_name, path)
|
||||||
+144
-33
@@ -1,64 +1,139 @@
|
|||||||
import os
|
import glob
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from typing import Optional
|
|
||||||
|
_PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"alpine": [
|
||||||
|
r"APKINDEX\.tar\.gz$",
|
||||||
|
],
|
||||||
|
"rpm": [
|
||||||
|
r"repomd\.xml$",
|
||||||
|
r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst"
|
||||||
|
r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst"
|
||||||
|
r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$",
|
||||||
|
r"Packages\.gz$",
|
||||||
|
],
|
||||||
|
"docker": [
|
||||||
|
r"/manifests/(?!sha256:)[^/]+$",
|
||||||
|
r"/tags/list$",
|
||||||
|
],
|
||||||
|
"pypi": [
|
||||||
|
r"simple/", # Per-package and top-level simple index pages
|
||||||
|
],
|
||||||
|
"npm": [],
|
||||||
|
"helm": [
|
||||||
|
r"index\.yaml$",
|
||||||
|
],
|
||||||
|
"generic": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ConfigManager:
|
class ConfigManager:
|
||||||
def __init__(self, config_file: str = "remotes.yaml"):
|
def __init__(self, config_path: str = "remotes.yaml"):
|
||||||
self.config_file = config_file
|
self.config_path = config_path
|
||||||
self._last_modified = 0
|
self._config_dir: str | None = None
|
||||||
|
self._last_modified: float = 0.0
|
||||||
self.config = self._load_config()
|
self.config = self._load_config()
|
||||||
|
|
||||||
def _load_config(self) -> dict:
|
def _load_single_file(self, path: str) -> dict:
|
||||||
try:
|
try:
|
||||||
with open(self.config_file, "r") as f:
|
with open(path) as f:
|
||||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(
|
if path.endswith((".yaml", ".yml")):
|
||||||
".yml"
|
return yaml.safe_load(f) or {}
|
||||||
):
|
return json.load(f)
|
||||||
return yaml.safe_load(f)
|
|
||||||
else:
|
|
||||||
return json.load(f)
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge(base: dict, overlay: dict) -> dict:
|
||||||
|
result = {**base}
|
||||||
|
for key, value in overlay.items():
|
||||||
|
if key == "remotes" and isinstance(base.get("remotes"), dict) and isinstance(value, dict):
|
||||||
|
result["remotes"] = {**base.get("remotes", {}), **value}
|
||||||
|
else:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _load_from_dir(self, dir_path: str) -> dict:
|
||||||
|
merged: dict = {}
|
||||||
|
files = sorted(glob.glob(os.path.join(dir_path, "*.yaml")) + glob.glob(os.path.join(dir_path, "*.yml")))
|
||||||
|
for path in files:
|
||||||
|
merged = self._merge(merged, self._load_single_file(path))
|
||||||
|
return merged
|
||||||
|
|
||||||
|
def _load_config(self) -> dict:
|
||||||
|
self._config_dir = None
|
||||||
|
|
||||||
|
if os.path.isdir(self.config_path):
|
||||||
|
return self._load_from_dir(self.config_path) or {"remotes": {}}
|
||||||
|
|
||||||
|
config = self._load_single_file(self.config_path)
|
||||||
|
if not config:
|
||||||
return {"remotes": {}}
|
return {"remotes": {}}
|
||||||
|
|
||||||
def _check_reload(self) -> None:
|
config_dir = config.pop("config_dir", None)
|
||||||
"""Check if config file has been modified and reload if needed"""
|
if config_dir:
|
||||||
try:
|
if not os.path.isabs(config_dir):
|
||||||
import os
|
config_dir = os.path.join(os.path.dirname(os.path.abspath(self.config_path)), config_dir)
|
||||||
|
self._config_dir = config_dir
|
||||||
|
config = self._merge(config, self._load_from_dir(config_dir))
|
||||||
|
|
||||||
current_modified = os.path.getmtime(self.config_file)
|
return config
|
||||||
|
|
||||||
|
def _file_mtimes(self) -> list[float]:
|
||||||
|
mtimes: list[float] = []
|
||||||
|
if os.path.isdir(self.config_path):
|
||||||
|
for f in glob.glob(os.path.join(self.config_path, "*.yaml")) + glob.glob(os.path.join(self.config_path, "*.yml")):
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(f))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(self.config_path))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if self._config_dir and os.path.isdir(self._config_dir):
|
||||||
|
for f in glob.glob(os.path.join(self._config_dir, "*.yaml")) + glob.glob(os.path.join(self._config_dir, "*.yml")):
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(f))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return mtimes
|
||||||
|
|
||||||
|
def _check_reload(self) -> None:
|
||||||
|
try:
|
||||||
|
current_modified = max(self._file_mtimes(), default=0.0)
|
||||||
if current_modified > self._last_modified:
|
if current_modified > self._last_modified:
|
||||||
self._last_modified = current_modified
|
self._last_modified = current_modified
|
||||||
self.config = self._load_config()
|
self.config = self._load_config()
|
||||||
print(f"Config reloaded from {self.config_file}")
|
print(f"Config reloaded from {self.config_path}")
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_remote_config(self, remote_name: str) -> Optional[dict]:
|
def get_remote_config(self, remote_name: str) -> dict | None:
|
||||||
self._check_reload()
|
self._check_reload()
|
||||||
return self.config.get("remotes", {}).get(remote_name)
|
return self.config.get("remotes", {}).get(remote_name)
|
||||||
|
|
||||||
def get_repository_patterns(self, remote_name: str, repo_path: str) -> list:
|
def get_immutable_patterns(self, remote_name: str, repo_path: str = "") -> list[str]:
|
||||||
remote_config = self.get_remote_config(remote_name)
|
remote_config = self.get_remote_config(remote_name)
|
||||||
if not remote_config:
|
if not remote_config:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
repositories = remote_config.get("repositories", {})
|
repositories = remote_config.get("repositories", {})
|
||||||
|
|
||||||
# Handle both dict (GitHub style) and list (Alpine style) repositories
|
|
||||||
if isinstance(repositories, dict):
|
if isinstance(repositories, dict):
|
||||||
repo_config = repositories.get(repo_path)
|
repo_config = repositories.get(repo_path)
|
||||||
if repo_config:
|
if repo_config:
|
||||||
patterns = repo_config.get("include_patterns", [])
|
patterns = repo_config.get("immutable_patterns", [])
|
||||||
else:
|
else:
|
||||||
patterns = remote_config.get("include_patterns", [])
|
patterns = remote_config.get("immutable_patterns", [])
|
||||||
elif isinstance(repositories, list):
|
|
||||||
# For Alpine, repositories is just a list of allowed repo names
|
|
||||||
# Pattern matching is handled by the main include_patterns
|
|
||||||
patterns = remote_config.get("include_patterns", [])
|
|
||||||
else:
|
else:
|
||||||
patterns = remote_config.get("include_patterns", [])
|
patterns = remote_config.get("immutable_patterns", [])
|
||||||
|
|
||||||
return patterns
|
return patterns
|
||||||
|
|
||||||
@@ -92,9 +167,7 @@ class ConfigManager:
|
|||||||
if not redis_url:
|
if not redis_url:
|
||||||
raise ValueError("REDIS_URL environment variable is required")
|
raise ValueError("REDIS_URL environment variable is required")
|
||||||
|
|
||||||
return {
|
return {"url": redis_url}
|
||||||
"url": redis_url
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_database_config(self) -> dict:
|
def get_database_config(self) -> dict:
|
||||||
"""Get database configuration from environment variables"""
|
"""Get database configuration from environment variables"""
|
||||||
@@ -105,12 +178,37 @@ class ConfigManager:
|
|||||||
db_name = os.getenv("DBNAME")
|
db_name = os.getenv("DBNAME")
|
||||||
|
|
||||||
if not all([db_host, db_port, db_user, db_pass, db_name]):
|
if not all([db_host, db_port, db_user, db_pass, db_name]):
|
||||||
missing = [var for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)] if not val]
|
missing = [
|
||||||
|
var
|
||||||
|
for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)]
|
||||||
|
if not val
|
||||||
|
]
|
||||||
raise ValueError(f"All database environment variables are required: {', '.join(missing)}")
|
raise ValueError(f"All database environment variables are required: {', '.join(missing)}")
|
||||||
|
|
||||||
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
||||||
return {"url": db_url}
|
return {"url": db_url}
|
||||||
|
|
||||||
|
def get_user_mutable_patterns(self, remote_name: str) -> list[str]:
|
||||||
|
"""Return only user-configured mutable_patterns, excluding package-type defaults."""
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return []
|
||||||
|
return remote_config.get("mutable_patterns", [])
|
||||||
|
|
||||||
|
def get_mutable_patterns(self, remote_name: str) -> list[str]:
|
||||||
|
"""Return mutable-file patterns for a remote (TTL is configured per-remote in cache.index_ttl).
|
||||||
|
|
||||||
|
Merges the package-level defaults with any extra patterns listed under
|
||||||
|
``mutable_patterns`` in the remote's config.
|
||||||
|
"""
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return []
|
||||||
|
package = remote_config.get("package", "generic")
|
||||||
|
defaults = _PACKAGE_MUTABLE_PATTERNS.get(package, [])
|
||||||
|
extra = remote_config.get("mutable_patterns", [])
|
||||||
|
return defaults + [p for p in extra if p not in defaults]
|
||||||
|
|
||||||
def get_cache_config(self, remote_name: str) -> dict:
|
def get_cache_config(self, remote_name: str) -> dict:
|
||||||
"""Get cache configuration for a specific remote"""
|
"""Get cache configuration for a specific remote"""
|
||||||
remote_config = self.get_remote_config(remote_name)
|
remote_config = self.get_remote_config(remote_name)
|
||||||
@@ -118,3 +216,16 @@ class ConfigManager:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
return remote_config.get("cache", {})
|
return remote_config.get("cache", {})
|
||||||
|
|
||||||
|
def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]:
|
||||||
|
"""Return (enabled, quarantine_days) for a remote.
|
||||||
|
|
||||||
|
When enabled=True and quarantine_days>0, immutable artifacts published
|
||||||
|
within the last quarantine_days days are blocked with a 404.
|
||||||
|
"""
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return False, 0
|
||||||
|
enabled = bool(remote_config.get("quarantine_new", False))
|
||||||
|
days = int(remote_config.get("quarantine_days", 0))
|
||||||
|
return enabled, days
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .postgres import DatabaseManager
|
||||||
|
|
||||||
|
__all__ = ["DatabaseManager"]
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
import os
|
|
||||||
from typing import Optional
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
from psycopg2.extras import RealDictCursor
|
from psycopg2.extras import RealDictCursor
|
||||||
|
|
||||||
@@ -11,7 +9,6 @@ class DatabaseManager:
|
|||||||
self._init_database()
|
self._init_database()
|
||||||
|
|
||||||
def _init_database(self):
|
def _init_database(self):
|
||||||
"""Initialize database connection and create schema if needed"""
|
|
||||||
try:
|
try:
|
||||||
self.connection = psycopg2.connect(self.db_url)
|
self.connection = psycopg2.connect(self.db_url)
|
||||||
self.connection.autocommit = True
|
self.connection.autocommit = True
|
||||||
@@ -23,10 +20,8 @@ class DatabaseManager:
|
|||||||
self.available = False
|
self.available = False
|
||||||
|
|
||||||
def _create_schema(self):
|
def _create_schema(self):
|
||||||
"""Create tables if they don't exist"""
|
|
||||||
try:
|
try:
|
||||||
with self.connection.cursor() as cursor:
|
with self.connection.cursor() as cursor:
|
||||||
# Create table to map S3 keys to remote names
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
@@ -53,27 +48,15 @@ class DatabaseManager:
|
|||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Create indexes separately
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||||
cursor.execute(
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||||
"CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)"
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||||
)
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)")
|
||||||
cursor.execute(
|
|
||||||
"CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)"
|
|
||||||
)
|
|
||||||
cursor.execute(
|
|
||||||
"CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)"
|
|
||||||
)
|
|
||||||
cursor.execute(
|
|
||||||
"CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)"
|
|
||||||
)
|
|
||||||
print("Database schema initialized")
|
print("Database schema initialized")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error creating schema: {e}")
|
print(f"Error creating schema: {e}")
|
||||||
|
|
||||||
def record_artifact_mapping(
|
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||||
self, s3_key: str, remote_name: str, file_path: str, size_bytes: int
|
|
||||||
):
|
|
||||||
"""Record mapping between S3 key and remote"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -95,7 +78,6 @@ class DatabaseManager:
|
|||||||
print(f"Error recording artifact mapping: {e}")
|
print(f"Error recording artifact mapping: {e}")
|
||||||
|
|
||||||
def get_storage_by_remote(self) -> dict[str, int]:
|
def get_storage_by_remote(self) -> dict[str, int]:
|
||||||
"""Get storage size breakdown by remote from database"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -112,8 +94,7 @@ class DatabaseManager:
|
|||||||
print(f"Error getting storage by remote: {e}")
|
print(f"Error getting storage by remote: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_remote_for_s3_key(self, s3_key: str) -> Optional[str]:
|
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||||
"""Get remote name for given S3 key"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -138,7 +119,6 @@ class DatabaseManager:
|
|||||||
sha256_sum: str,
|
sha256_sum: str,
|
||||||
content_type: str = None,
|
content_type: str = None,
|
||||||
):
|
):
|
||||||
"""Add a file to local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -165,7 +145,6 @@ class DatabaseManager:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||||
"""Get metadata for a local file"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -197,7 +176,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def list_local_files(self, repository_name: str, prefix: str = ""):
|
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||||
"""List files in local repository with optional path prefix"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -241,7 +219,6 @@ class DatabaseManager:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
def delete_local_file(self, repository_name: str, file_path: str):
|
def delete_local_file(self, repository_name: str, file_path: str):
|
||||||
"""Delete a file from local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -263,7 +240,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def file_exists(self, repository_name: str, file_path: str):
|
def file_exists(self, repository_name: str, file_path: str):
|
||||||
"""Check if file exists in local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -1,96 +1,19 @@
|
|||||||
import time
|
from .auth.docker import (
|
||||||
import logging
|
_cache_key,
|
||||||
import re
|
_get_cached_token,
|
||||||
from typing import Optional
|
_store_token,
|
||||||
import httpx
|
_token_cache,
|
||||||
|
fetch_token,
|
||||||
logger = logging.getLogger(__name__)
|
get_docker_token_for_response,
|
||||||
|
parse_www_authenticate,
|
||||||
# In-memory token cache: key -> (token, expires_at)
|
|
||||||
_token_cache: dict[str, tuple[str, float]] = {}
|
|
||||||
|
|
||||||
_WWW_AUTH_RE = re.compile(
|
|
||||||
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
|
||||||
r'(?:,service="(?P<service>[^"]*)")?'
|
|
||||||
r'(?:,scope="(?P<scope>[^"]*)")?',
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
def _cache_key(realm: str, service: str, scope: str, username: Optional[str]) -> str:
|
"_cache_key",
|
||||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
"_get_cached_token",
|
||||||
|
"_store_token",
|
||||||
|
"_token_cache",
|
||||||
def _get_cached_token(key: str) -> Optional[str]:
|
"fetch_token",
|
||||||
entry = _token_cache.get(key)
|
"get_docker_token_for_response",
|
||||||
if entry and entry[1] > time.time():
|
"parse_www_authenticate",
|
||||||
return entry[0]
|
]
|
||||||
_token_cache.pop(key, None)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _store_token(key: str, token: str, expires_in: int) -> None:
|
|
||||||
# Expire 30s early to avoid using a token right as it expires
|
|
||||||
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_token(
|
|
||||||
realm: str,
|
|
||||||
service: str,
|
|
||||||
scope: str,
|
|
||||||
username: Optional[str] = None,
|
|
||||||
password: Optional[str] = None,
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
|
||||||
key = _cache_key(realm, service, scope, username)
|
|
||||||
cached = _get_cached_token(key)
|
|
||||||
if cached:
|
|
||||||
return cached
|
|
||||||
|
|
||||||
params: dict[str, str] = {}
|
|
||||||
if service:
|
|
||||||
params["service"] = service
|
|
||||||
if scope:
|
|
||||||
params["scope"] = scope
|
|
||||||
|
|
||||||
auth = (username, password) if username and password else None
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(realm, params=params, auth=auth)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
token = data.get("token") or data.get("access_token")
|
|
||||||
if not token:
|
|
||||||
logger.warning(f"Docker token response missing token field: {data}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
expires_in = int(data.get("expires_in", 300))
|
|
||||||
_store_token(key, token, expires_in)
|
|
||||||
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def parse_www_authenticate(header: str) -> Optional[tuple[str, str, str]]:
|
|
||||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
|
||||||
m = _WWW_AUTH_RE.search(header)
|
|
||||||
if not m:
|
|
||||||
return None
|
|
||||||
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
|
||||||
|
|
||||||
|
|
||||||
async def get_docker_token_for_response(
|
|
||||||
www_authenticate: str,
|
|
||||||
username: Optional[str] = None,
|
|
||||||
password: Optional[str] = None,
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
|
||||||
parsed = parse_www_authenticate(www_authenticate)
|
|
||||||
if not parsed:
|
|
||||||
return None
|
|
||||||
realm, service, scope = parsed
|
|
||||||
return await fetch_token(realm, service, scope, username, password)
|
|
||||||
|
|||||||
+58
-732
@@ -1,28 +1,44 @@
|
|||||||
import os
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import hashlib
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, Optional
|
import os
|
||||||
import httpx
|
|
||||||
from fastapi import FastAPI, HTTPException, Response, Request, Query, File, UploadFile
|
from fastapi import FastAPI, File, Query, Request, UploadFile
|
||||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
from fastapi.responses import PlainTextResponse
|
||||||
|
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
|
||||||
__version__ = version("artifactapi")
|
__version__ = version("artifactapi")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Fallback for development when package isn't installed
|
|
||||||
__version__ = "dev"
|
__version__ = "dev"
|
||||||
|
|
||||||
|
from .artifact import discovery, flush, local, proxy
|
||||||
|
from .artifact import docker as docker_handler
|
||||||
|
from .cache import RedisCache
|
||||||
from .config import ConfigManager
|
from .config import ConfigManager
|
||||||
from .database import DatabaseManager
|
from .database import DatabaseManager
|
||||||
from .storage import S3Storage
|
|
||||||
from .cache import RedisCache
|
|
||||||
from .metrics import MetricsManager
|
from .metrics import MetricsManager
|
||||||
from .docker_auth import get_docker_token_for_response
|
from .storage import S3Storage
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||||
|
|
||||||
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
|
if not config_path:
|
||||||
|
raise ValueError("CONFIG_PATH environment variable is required")
|
||||||
|
config = ConfigManager(config_path)
|
||||||
|
|
||||||
|
s3_config = config.get_s3_config()
|
||||||
|
redis_config = config.get_redis_config()
|
||||||
|
db_config = config.get_database_config()
|
||||||
|
|
||||||
|
storage = S3Storage(**s3_config)
|
||||||
|
cache = RedisCache(redis_config["url"])
|
||||||
|
database = DatabaseManager(db_config["url"])
|
||||||
|
metrics = MetricsManager(cache, database)
|
||||||
|
|
||||||
|
|
||||||
class ArtifactRequest(BaseModel):
|
class ArtifactRequest(BaseModel):
|
||||||
@@ -30,41 +46,10 @@ class ArtifactRequest(BaseModel):
|
|||||||
include_pattern: str
|
include_pattern: str
|
||||||
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
|
||||||
|
|
||||||
# Initialize components using config
|
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
|
||||||
if not config_path:
|
|
||||||
raise ValueError("CONFIG_PATH environment variable is required")
|
|
||||||
config = ConfigManager(config_path)
|
|
||||||
|
|
||||||
# Get configurations
|
|
||||||
s3_config = config.get_s3_config()
|
|
||||||
redis_config = config.get_redis_config()
|
|
||||||
db_config = config.get_database_config()
|
|
||||||
|
|
||||||
# Initialize services
|
|
||||||
storage = S3Storage(**s3_config)
|
|
||||||
cache = RedisCache(redis_config["url"])
|
|
||||||
database = DatabaseManager(db_config["url"])
|
|
||||||
metrics = MetricsManager(cache, database)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def read_root():
|
def read_root():
|
||||||
config._check_reload()
|
config._check_reload()
|
||||||
return {
|
return {"message": "Artifact Storage API", "version": app.version, "remotes": list(config.config.get("remotes", {}).keys())}
|
||||||
"message": "Artifact Storage API",
|
|
||||||
"version": app.version,
|
|
||||||
"remotes": list(config.config.get("remotes", {}).keys()),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
@@ -72,725 +57,66 @@ def health_check():
|
|||||||
return {"status": "healthy"}
|
return {"status": "healthy"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/config")
|
||||||
|
def get_config():
|
||||||
|
return config.config
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/metrics")
|
||||||
|
def get_metrics(json: bool | None = Query(False, description="Return JSON format instead of Prometheus")):
|
||||||
|
config._check_reload()
|
||||||
|
if json:
|
||||||
|
return metrics.get_metrics(storage, config)
|
||||||
|
metrics.get_metrics(storage, config)
|
||||||
|
return PlainTextResponse(generate_latest().decode("utf-8"), media_type=CONTENT_TYPE_LATEST)
|
||||||
|
|
||||||
|
|
||||||
@app.put("/cache/flush")
|
@app.put("/cache/flush")
|
||||||
def flush_cache(
|
def flush_cache(
|
||||||
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
||||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'")
|
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
||||||
):
|
):
|
||||||
"""Flush cache entries for specified remote or all remotes"""
|
return flush.handle(remote, cache_type, cache, storage)
|
||||||
try:
|
|
||||||
result = {
|
|
||||||
"remote": remote,
|
|
||||||
"cache_type": cache_type,
|
|
||||||
"flushed": {
|
|
||||||
"redis_keys": 0,
|
|
||||||
"s3_objects": 0,
|
|
||||||
"operations": []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Flush Redis entries based on cache_type
|
|
||||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
|
||||||
patterns = []
|
|
||||||
|
|
||||||
if cache_type in ["all", "index"]:
|
|
||||||
if remote:
|
|
||||||
patterns.append(f"index:{remote}:*")
|
|
||||||
else:
|
|
||||||
patterns.append("index:*")
|
|
||||||
|
|
||||||
if cache_type in ["all", "metrics"]:
|
|
||||||
if remote:
|
|
||||||
patterns.append(f"metrics:*:{remote}")
|
|
||||||
else:
|
|
||||||
patterns.append("metrics:*")
|
|
||||||
|
|
||||||
for pattern in patterns:
|
|
||||||
keys = cache.client.keys(pattern)
|
|
||||||
if keys:
|
|
||||||
cache.client.delete(*keys)
|
|
||||||
result["flushed"]["redis_keys"] += len(keys)
|
|
||||||
logger.info(f"Cache flush: Deleted {len(keys)} Redis keys matching '{pattern}'")
|
|
||||||
|
|
||||||
if result["flushed"]["redis_keys"] > 0:
|
|
||||||
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
|
||||||
|
|
||||||
# Flush S3 objects if requested
|
|
||||||
if cache_type in ["all", "files"]:
|
|
||||||
try:
|
|
||||||
# Use prefix filtering for remote-specific deletion
|
|
||||||
list_params = {"Bucket": storage.bucket}
|
|
||||||
if remote:
|
|
||||||
list_params["Prefix"] = f"{remote}/"
|
|
||||||
|
|
||||||
response = storage.client.list_objects_v2(**list_params)
|
|
||||||
if 'Contents' in response:
|
|
||||||
objects_to_delete = [obj['Key'] for obj in response['Contents']]
|
|
||||||
|
|
||||||
for key in objects_to_delete:
|
|
||||||
try:
|
|
||||||
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
|
||||||
result["flushed"]["s3_objects"] += 1
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
|
||||||
|
|
||||||
if objects_to_delete:
|
|
||||||
scope = f" for remote '{remote}'" if remote else ""
|
|
||||||
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
|
||||||
logger.info(f"Cache flush: Deleted {len(objects_to_delete)} S3 objects{scope}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
|
||||||
logger.error(f"Cache flush S3 error: {e}")
|
|
||||||
|
|
||||||
if not result["flushed"]["operations"]:
|
|
||||||
result["flushed"]["operations"].append("No cache entries found to flush")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Cache flush error: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
async def construct_remote_url(remote_name: str, path: str) -> str:
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
|
||||||
)
|
|
||||||
|
|
||||||
base_url = remote_config.get("base_url")
|
|
||||||
if not base_url:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500, detail=f"No base_url configured for remote '{remote_name}'"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle Docker registry URLs
|
|
||||||
if remote_config.get("package") == "docker":
|
|
||||||
# Convert Docker paths to v2 API format
|
|
||||||
# e.g., library/nginx/manifests/latest -> v2/library/nginx/manifests/latest
|
|
||||||
return f"{base_url}/v2/{path}"
|
|
||||||
|
|
||||||
return f"{base_url}/{path}"
|
|
||||||
|
|
||||||
|
|
||||||
async def check_artifact_patterns(
|
|
||||||
remote_name: str, repo_path: str, file_path: str, full_path: str
|
|
||||||
) -> bool:
|
|
||||||
# First check if this is an index file - always allow index files
|
|
||||||
if cache.is_index_file(file_path) or cache.is_index_file(full_path):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Then check basic include patterns
|
|
||||||
patterns = config.get_repository_patterns(remote_name, repo_path)
|
|
||||||
if not patterns:
|
|
||||||
return True # Allow all if no patterns configured
|
|
||||||
|
|
||||||
pattern_matched = False
|
|
||||||
for pattern in patterns:
|
|
||||||
# Check both file_path and full_path to handle different pattern types
|
|
||||||
if re.search(pattern, file_path) or re.search(pattern, full_path):
|
|
||||||
pattern_matched = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not pattern_matched:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# All remotes now use pattern-based filtering only - no additional checks needed
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
|
||||||
# Use hierarchical path-based key
|
|
||||||
key = storage.get_object_key(remote_name, path)
|
|
||||||
|
|
||||||
if storage.exists(key):
|
|
||||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"cached_url": storage.get_url(key),
|
|
||||||
"status": "already_cached",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
remote_config = config.get_remote_config(remote_name) or {}
|
|
||||||
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
|
||||||
|
|
||||||
# Prepare headers for Docker registry requests
|
|
||||||
headers = {}
|
|
||||||
if is_docker:
|
|
||||||
if "/manifests/" in url:
|
|
||||||
headers["Accept"] = (
|
|
||||||
"application/vnd.docker.distribution.manifest.v2+json,"
|
|
||||||
"application/vnd.oci.image.manifest.v1+json,"
|
|
||||||
"application/vnd.oci.image.index.v1+json,"
|
|
||||||
"application/vnd.docker.distribution.manifest.list.v2+json"
|
|
||||||
)
|
|
||||||
elif "/blobs/" in url:
|
|
||||||
headers["Accept"] = "application/octet-stream"
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(url, headers=headers)
|
|
||||||
|
|
||||||
# Handle Docker Bearer token challenge
|
|
||||||
if response.status_code == 401 and is_docker:
|
|
||||||
www_auth = response.headers.get("WWW-Authenticate", "")
|
|
||||||
username = remote_config.get("username")
|
|
||||||
password = remote_config.get("password")
|
|
||||||
token = await get_docker_token_for_response(www_auth, username, password)
|
|
||||||
if token:
|
|
||||||
headers["Authorization"] = f"Bearer {token}"
|
|
||||||
response = await client.get(url, headers=headers)
|
|
||||||
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
storage_path = storage.upload(key, response.content)
|
|
||||||
|
|
||||||
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"cached_url": storage.get_url(key),
|
|
||||||
"storage_path": storage_path,
|
|
||||||
"size": len(response.content),
|
|
||||||
"status": "cached",
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {"url": url, "status": "error", "error": str(e)}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
|
||||||
async def get_artifact(remote_name: str, path: str):
|
|
||||||
# Check if remote is configured
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if this is a local repository
|
|
||||||
if remote_config.get("type") == "local":
|
|
||||||
# Handle local repository download
|
|
||||||
metadata = database.get_local_file_metadata(remote_name, path)
|
|
||||||
if not metadata:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
# Get file from S3
|
|
||||||
content = storage.download_object(metadata["s3_key"])
|
|
||||||
if content is None:
|
|
||||||
raise HTTPException(status_code=500, detail="File not accessible")
|
|
||||||
|
|
||||||
# Determine content type
|
|
||||||
content_type = metadata.get("content_type", "application/octet-stream")
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=content,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={
|
|
||||||
"Content-Disposition": f"attachment; filename={os.path.basename(path)}"
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract repository path for pattern checking
|
|
||||||
path_parts = path.split("/")
|
|
||||||
if len(path_parts) >= 2:
|
|
||||||
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
|
||||||
file_path = "/".join(path_parts[2:])
|
|
||||||
else:
|
|
||||||
repo_path = path
|
|
||||||
file_path = path
|
|
||||||
|
|
||||||
# Check if artifact matches configured patterns
|
|
||||||
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
|
||||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=403, detail="Artifact not allowed by configuration patterns"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Construct the remote URL
|
|
||||||
remote_url = await construct_remote_url(remote_name, path)
|
|
||||||
|
|
||||||
# Check if artifact is already cached
|
|
||||||
cached_key = storage.get_object_key(remote_name, path)
|
|
||||||
if not storage.exists(cached_key):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
# For index files, check Redis TTL validity
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
is_index = cache.is_index_file(path) # Check full path, not just filename
|
|
||||||
|
|
||||||
if cached_key and is_index:
|
|
||||||
# Index file exists, but check if it's still valid
|
|
||||||
if not cache.is_index_valid(remote_name, path):
|
|
||||||
# Index has expired, remove it from S3
|
|
||||||
logger.info(f"Index EXPIRED: {remote_name}/{path} - removing from cache")
|
|
||||||
cache.cleanup_expired_index(storage, remote_name, path)
|
|
||||||
cached_key = None # Force re-download
|
|
||||||
|
|
||||||
if cached_key:
|
|
||||||
# Return cached artifact
|
|
||||||
try:
|
|
||||||
artifact_data = storage.download_object(cached_key)
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
|
|
||||||
# Log cache hit
|
|
||||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
|
||||||
|
|
||||||
# Determine content type based on file extension
|
|
||||||
content_type = "application/octet-stream"
|
|
||||||
if filename.endswith(".tar.gz"):
|
|
||||||
content_type = "application/gzip"
|
|
||||||
elif filename.endswith(".zip"):
|
|
||||||
content_type = "application/zip"
|
|
||||||
elif filename.endswith(".exe"):
|
|
||||||
content_type = "application/x-msdownload"
|
|
||||||
elif filename.endswith(".rpm"):
|
|
||||||
content_type = "application/x-rpm"
|
|
||||||
elif filename.endswith(".xml"):
|
|
||||||
content_type = "application/xml"
|
|
||||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
|
||||||
content_type = "application/gzip"
|
|
||||||
|
|
||||||
# Record cache hit metrics
|
|
||||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
|
||||||
|
|
||||||
# Record artifact mapping in database if not already recorded
|
|
||||||
database.record_artifact_mapping(
|
|
||||||
cached_key, remote_name, path, len(artifact_data)
|
|
||||||
)
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=artifact_data,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={
|
|
||||||
"Content-Disposition": f"attachment; filename={filename}",
|
|
||||||
"X-Artifact-Source": "cache",
|
|
||||||
"X-Artifact-Size": str(len(artifact_data)),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500, detail=f"Error retrieving cached artifact: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Artifact not cached, cache it first
|
|
||||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
|
||||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
|
||||||
|
|
||||||
if result["status"] == "error":
|
|
||||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=502, detail=f"Failed to fetch artifact: {result['error']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Mark index files as cached in Redis if this was a new download
|
|
||||||
if result["status"] == "cached" and is_index:
|
|
||||||
# Get TTL from remote config
|
|
||||||
cache_config = config.get_cache_config(remote_name)
|
|
||||||
index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes
|
|
||||||
cache.mark_index_cached(remote_name, path, index_ttl)
|
|
||||||
logger.info(f"Index file cached with TTL: {remote_name}/{path} (ttl: {index_ttl}s)")
|
|
||||||
|
|
||||||
# Now return the cached artifact
|
|
||||||
try:
|
|
||||||
cache_key = storage.get_object_key(remote_name, path)
|
|
||||||
artifact_data = storage.download_object(cache_key)
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
|
|
||||||
content_type = "application/octet-stream"
|
|
||||||
if filename.endswith(".tar.gz"):
|
|
||||||
content_type = "application/gzip"
|
|
||||||
elif filename.endswith(".zip"):
|
|
||||||
content_type = "application/zip"
|
|
||||||
elif filename.endswith(".exe"):
|
|
||||||
content_type = "application/x-msdownload"
|
|
||||||
elif filename.endswith(".rpm"):
|
|
||||||
content_type = "application/x-rpm"
|
|
||||||
elif filename.endswith(".xml"):
|
|
||||||
content_type = "application/xml"
|
|
||||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
|
||||||
content_type = "application/gzip"
|
|
||||||
|
|
||||||
# Record cache miss metrics
|
|
||||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
|
||||||
|
|
||||||
# Record artifact mapping in database
|
|
||||||
cache_key = storage.get_object_key(remote_name, path)
|
|
||||||
database.record_artifact_mapping(
|
|
||||||
cache_key, remote_name, path, len(artifact_data)
|
|
||||||
)
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=artifact_data,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={
|
|
||||||
"Content-Disposition": f"attachment; filename={filename}",
|
|
||||||
"X-Artifact-Source": "remote",
|
|
||||||
"X-Artifact-Size": str(len(artifact_data)),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v2/")
|
@app.get("/v2/")
|
||||||
async def docker_v2_ping():
|
async def docker_v2_ping():
|
||||||
return Response(
|
return docker_handler.ping()
|
||||||
content="{}",
|
|
||||||
media_type="application/json",
|
|
||||||
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
||||||
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
||||||
remote_config = config.get_remote_config(remote_name)
|
return await docker_handler.proxy(request, remote_name, path, storage, cache, config, metrics)
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
if remote_config.get("package") != "docker":
|
|
||||||
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
|
||||||
|
|
||||||
# Check include_patterns against the image name (e.g. "library/nginx")
|
|
||||||
patterns = config.get_repository_patterns(remote_name, "")
|
|
||||||
if patterns:
|
|
||||||
path_parts = path.split("/")
|
|
||||||
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
|
||||||
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
|
||||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
|
||||||
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
|
||||||
|
|
||||||
remote_url = await construct_remote_url(remote_name, path)
|
|
||||||
|
|
||||||
cached_key = storage.get_object_key(remote_name, path)
|
|
||||||
if not storage.exists(cached_key):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
is_index = cache.is_index_file(path)
|
|
||||||
|
|
||||||
if cached_key and is_index:
|
|
||||||
if not cache.is_index_valid(remote_name, path):
|
|
||||||
logger.info(f"Index EXPIRED: {remote_name}/{path} - removing from cache")
|
|
||||||
cache.cleanup_expired_index(storage, remote_name, path)
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
if not cached_key:
|
|
||||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
|
||||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
|
||||||
if result["status"] == "error":
|
|
||||||
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
|
||||||
if result["status"] == "cached" and is_index:
|
|
||||||
cache_config = config.get_cache_config(remote_name)
|
|
||||||
index_ttl = cache_config.get("index_ttl", 300)
|
|
||||||
cache.mark_index_cached(remote_name, path, index_ttl)
|
|
||||||
logger.info(f"Index file cached with TTL: {remote_name}/{path} (ttl: {index_ttl}s)")
|
|
||||||
|
|
||||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
|
||||||
|
|
||||||
is_blob = "/blobs/" in path
|
|
||||||
if is_blob:
|
|
||||||
content_type = "application/octet-stream"
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
manifest_json = json.loads(artifact_data)
|
|
||||||
content_type = manifest_json.get("mediaType")
|
|
||||||
if not content_type:
|
|
||||||
if "manifests" in manifest_json:
|
|
||||||
content_type = "application/vnd.oci.image.index.v1+json"
|
|
||||||
else:
|
|
||||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
|
||||||
except Exception:
|
|
||||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
|
||||||
|
|
||||||
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
|
||||||
headers = {
|
|
||||||
"Docker-Distribution-Api-Version": "registry/2.0",
|
|
||||||
"Docker-Content-Digest": digest,
|
|
||||||
"Content-Length": str(len(artifact_data)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if request.method == "HEAD":
|
|
||||||
return Response(status_code=200, headers=headers, media_type=content_type)
|
|
||||||
|
|
||||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
|
||||||
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
|
||||||
|
|
||||||
|
|
||||||
async def discover_artifacts(remote: str, include_pattern: str) -> list[str]:
|
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
if "github.com" in remote:
|
async def get_artifact(request: Request, remote_name: str, path: str):
|
||||||
return await discover_github_releases(remote, include_pattern)
|
return await proxy.handle(request, remote_name, path, storage, cache, config, database, metrics)
|
||||||
else:
|
|
||||||
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
|
||||||
|
|
||||||
|
|
||||||
async def discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
|
||||||
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
|
||||||
if not match:
|
|
||||||
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
|
||||||
|
|
||||||
owner, repo = match.groups()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(
|
|
||||||
f"https://api.github.com/repos/{owner}/{repo}/releases"
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=response.status_code,
|
|
||||||
detail=f"Failed to fetch releases: {response.text}",
|
|
||||||
)
|
|
||||||
|
|
||||||
releases = response.json()
|
|
||||||
|
|
||||||
matching_urls = []
|
|
||||||
pattern = include_pattern.replace("*", ".*")
|
|
||||||
regex = re.compile(pattern)
|
|
||||||
|
|
||||||
for release in releases:
|
|
||||||
for asset in release.get("assets", []):
|
|
||||||
download_url = asset["browser_download_url"]
|
|
||||||
if regex.search(download_url):
|
|
||||||
matching_urls.append(download_url)
|
|
||||||
|
|
||||||
return matching_urls
|
|
||||||
|
|
||||||
|
|
||||||
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
||||||
"""Upload a file to local repository"""
|
return await local.upload(remote_name, path, file, storage, database, config)
|
||||||
# Check if remote is configured and is local
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
|
||||||
)
|
|
||||||
|
|
||||||
if remote_config.get("type") != "local":
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400, detail="Upload only supported for local repositories"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Read file content
|
|
||||||
content = await file.read()
|
|
||||||
|
|
||||||
# Calculate SHA256
|
|
||||||
sha256_sum = hashlib.sha256(content).hexdigest()
|
|
||||||
|
|
||||||
# Check if file already exists (prevent overwrite)
|
|
||||||
if database.file_exists(remote_name, path):
|
|
||||||
raise HTTPException(status_code=409, detail="File already exists")
|
|
||||||
|
|
||||||
# Generate S3 key
|
|
||||||
s3_key = f"local/{remote_name}/{path}"
|
|
||||||
|
|
||||||
# Determine content type
|
|
||||||
content_type = file.content_type or "application/octet-stream"
|
|
||||||
|
|
||||||
# Upload to S3
|
|
||||||
try:
|
|
||||||
storage.upload(s3_key, content)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
|
||||||
|
|
||||||
# Add to database
|
|
||||||
success = database.add_local_file(
|
|
||||||
repository_name=remote_name,
|
|
||||||
file_path=path,
|
|
||||||
s3_key=s3_key,
|
|
||||||
size_bytes=len(content),
|
|
||||||
sha256_sum=sha256_sum,
|
|
||||||
content_type=content_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
# Clean up S3 if database insert failed
|
|
||||||
storage.delete_object(s3_key)
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
|
||||||
|
|
||||||
return JSONResponse(
|
|
||||||
{
|
|
||||||
"message": "File uploaded successfully",
|
|
||||||
"file_path": path,
|
|
||||||
"size_bytes": len(content),
|
|
||||||
"sha256_sum": sha256_sum,
|
|
||||||
"content_type": content_type,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
def check_file_exists(remote_name: str, path: str):
|
def check_file_exists(remote_name: str, path: str):
|
||||||
"""Check if file exists (for CI jobs) - supports local repositories only"""
|
return local.check_exists(remote_name, path, database, config)
|
||||||
# Check if remote is configured
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle local repository
|
|
||||||
if remote_config.get("type") == "local":
|
|
||||||
try:
|
|
||||||
metadata = database.get_local_file_metadata(remote_name, path)
|
|
||||||
if not metadata:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
headers={
|
|
||||||
"Content-Length": str(metadata["size_bytes"]),
|
|
||||||
"Content-Type": metadata.get(
|
|
||||||
"content_type", "application/octet-stream"
|
|
||||||
),
|
|
||||||
"X-SHA256": metadata["sha256_sum"],
|
|
||||||
"X-Created-At": metadata["created_at"].isoformat()
|
|
||||||
if metadata["created_at"]
|
|
||||||
else "",
|
|
||||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat()
|
|
||||||
if metadata["uploaded_at"]
|
|
||||||
else "",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
|
||||||
else:
|
|
||||||
# For remote repositories, just return 405 Method Not Allowed
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=405, detail="HEAD method only supported for local repositories"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
def delete_file(remote_name: str, path: str):
|
def delete_file(remote_name: str, path: str):
|
||||||
"""Delete a file from local repository"""
|
return local.delete(remote_name, path, storage, database, config)
|
||||||
# Check if remote is configured and is local
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
|
||||||
)
|
|
||||||
|
|
||||||
if remote_config.get("type") != "local":
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400, detail="Delete only supported for local repositories"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get S3 key before deleting from database
|
|
||||||
s3_key = database.delete_local_file(remote_name, path)
|
|
||||||
if not s3_key:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
# Delete from S3
|
|
||||||
if not storage.delete_object(s3_key):
|
|
||||||
# File was deleted from database but not from S3 - log warning but continue
|
|
||||||
print(f"Warning: Failed to delete S3 object {s3_key}")
|
|
||||||
|
|
||||||
return JSONResponse({"message": "File deleted successfully"})
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/v1/artifacts/cache")
|
@app.post("/api/v1/artifacts/cache")
|
||||||
async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
async def cache_artifact(request: ArtifactRequest):
|
||||||
try:
|
return await discovery.cache_artifacts(request.remote, request.include_pattern, storage)
|
||||||
matching_urls = await discover_artifacts(
|
|
||||||
request.remote, request.include_pattern
|
|
||||||
)
|
|
||||||
|
|
||||||
if not matching_urls:
|
|
||||||
return {
|
|
||||||
"message": "No matching artifacts found",
|
|
||||||
"cached_count": 0,
|
|
||||||
"artifacts": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
cached_artifacts = []
|
|
||||||
|
|
||||||
for url in matching_urls:
|
|
||||||
result = await cache_single_artifact(url, "", "")
|
|
||||||
cached_artifacts.append(result)
|
|
||||||
|
|
||||||
cached_count = sum(
|
|
||||||
1
|
|
||||||
for artifact in cached_artifacts
|
|
||||||
if artifact["status"] in ["cached", "already_cached"]
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
|
||||||
"cached_count": cached_count,
|
|
||||||
"artifacts": cached_artifacts,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/artifacts/{remote:path}")
|
@app.get("/api/v1/artifacts/{remote:path}")
|
||||||
async def list_cached_artifacts(
|
async def list_cached_artifacts(remote: str, include_pattern: str = ".*"):
|
||||||
remote: str, include_pattern: str = ".*"
|
return await discovery.list_artifacts(remote, include_pattern, storage)
|
||||||
) -> Dict[str, Any]:
|
|
||||||
try:
|
|
||||||
matching_urls = await discover_artifacts(remote, include_pattern)
|
|
||||||
|
|
||||||
cached_artifacts = []
|
|
||||||
for url in matching_urls:
|
|
||||||
# Extract path from URL for hierarchical key generation
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
parsed = urlparse(url)
|
|
||||||
path = parsed.path
|
|
||||||
key = storage.get_object_key(remote, path)
|
|
||||||
if storage.exists(key):
|
|
||||||
cached_artifacts.append(
|
|
||||||
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"remote": remote,
|
|
||||||
"pattern": include_pattern,
|
|
||||||
"total_found": len(matching_urls),
|
|
||||||
"cached_count": len(cached_artifacts),
|
|
||||||
"artifacts": cached_artifacts,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/metrics")
|
|
||||||
def get_metrics(
|
|
||||||
json: Optional[bool] = Query(
|
|
||||||
False, description="Return JSON format instead of Prometheus"
|
|
||||||
),
|
|
||||||
):
|
|
||||||
"""Get comprehensive metrics about the artifact storage system"""
|
|
||||||
config._check_reload()
|
|
||||||
|
|
||||||
if json:
|
|
||||||
# Return JSON format
|
|
||||||
return metrics.get_metrics(storage, config)
|
|
||||||
else:
|
|
||||||
# Return Prometheus format
|
|
||||||
metrics.get_metrics(storage, config) # Update gauges
|
|
||||||
prometheus_data = generate_latest().decode("utf-8")
|
|
||||||
return PlainTextResponse(prometheus_data, media_type=CONTENT_TYPE_LATEST)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/config")
|
|
||||||
def get_config():
|
|
||||||
return config.config
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
+18
-51
@@ -1,22 +1,14 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Any
|
from typing import Any
|
||||||
|
|
||||||
from prometheus_client import Counter, Gauge
|
from prometheus_client import Counter, Gauge
|
||||||
|
|
||||||
|
|
||||||
# Prometheus metrics
|
# Prometheus metrics
|
||||||
request_counter = Counter(
|
request_counter = Counter("artifact_requests_total", "Total artifact requests", ["remote", "status"])
|
||||||
"artifact_requests_total", "Total artifact requests", ["remote", "status"]
|
|
||||||
)
|
|
||||||
cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"])
|
cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"])
|
||||||
cache_miss_counter = Counter(
|
cache_miss_counter = Counter("artifact_cache_misses_total", "Total cache misses", ["remote"])
|
||||||
"artifact_cache_misses_total", "Total cache misses", ["remote"]
|
bandwidth_saved_counter = Counter("artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"])
|
||||||
)
|
storage_size_gauge = Gauge("artifact_storage_size_bytes", "Storage size by remote", ["remote"])
|
||||||
bandwidth_saved_counter = Counter(
|
|
||||||
"artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"]
|
|
||||||
)
|
|
||||||
storage_size_gauge = Gauge(
|
|
||||||
"artifact_storage_size_bytes", "Storage size by remote", ["remote"]
|
|
||||||
)
|
|
||||||
redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys")
|
redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys")
|
||||||
|
|
||||||
|
|
||||||
@@ -44,9 +36,7 @@ class MetricsManager:
|
|||||||
# Increment per-remote counters
|
# Increment per-remote counters
|
||||||
self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}")
|
self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}")
|
||||||
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
||||||
self.redis_client.client.incrby(
|
self.redis_client.client.incrby(f"metrics:bandwidth_saved:{remote_name}", size_bytes)
|
||||||
f"metrics:bandwidth_saved:{remote_name}", size_bytes
|
|
||||||
)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -91,7 +81,7 @@ class MetricsManager:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def get_s3_size_by_remote(self, storage, config_manager) -> Dict[str, int]:
|
def get_s3_size_by_remote(self, storage, config_manager) -> dict[str, int]:
|
||||||
"""Get size of stored data per remote using database mappings"""
|
"""Get size of stored data per remote using database mappings"""
|
||||||
if self.database_manager and self.database_manager.available:
|
if self.database_manager and self.database_manager.available:
|
||||||
# Get from database if available
|
# Get from database if available
|
||||||
@@ -146,7 +136,7 @@ class MetricsManager:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_metrics(self, storage, config_manager) -> Dict[str, Any]:
|
def get_metrics(self, storage, config_manager) -> dict[str, Any]:
|
||||||
"""Get comprehensive metrics"""
|
"""Get comprehensive metrics"""
|
||||||
# Update Redis keys gauge
|
# Update Redis keys gauge
|
||||||
redis_key_count = self.get_redis_key_count()
|
redis_key_count = self.get_redis_key_count()
|
||||||
@@ -173,54 +163,31 @@ class MetricsManager:
|
|||||||
if self.redis_client and self.redis_client.available:
|
if self.redis_client and self.redis_client.available:
|
||||||
try:
|
try:
|
||||||
# Get global metrics
|
# Get global metrics
|
||||||
cache_hits = int(
|
cache_hits = int(self.redis_client.client.get("metrics:cache_hits") or 0)
|
||||||
self.redis_client.client.get("metrics:cache_hits") or 0
|
cache_misses = int(self.redis_client.client.get("metrics:cache_misses") or 0)
|
||||||
)
|
|
||||||
cache_misses = int(
|
|
||||||
self.redis_client.client.get("metrics:cache_misses") or 0
|
|
||||||
)
|
|
||||||
total_requests = cache_hits + cache_misses
|
total_requests = cache_hits + cache_misses
|
||||||
bandwidth_saved = int(
|
bandwidth_saved = int(self.redis_client.client.get("metrics:bandwidth_saved") or 0)
|
||||||
self.redis_client.client.get("metrics:bandwidth_saved") or 0
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics["requests"]["cache_hits"] = cache_hits
|
metrics["requests"]["cache_hits"] = cache_hits
|
||||||
metrics["requests"]["cache_misses"] = cache_misses
|
metrics["requests"]["cache_misses"] = cache_misses
|
||||||
metrics["requests"]["total_requests"] = total_requests
|
metrics["requests"]["total_requests"] = total_requests
|
||||||
metrics["requests"]["cache_hit_ratio"] = (
|
metrics["requests"]["cache_hit_ratio"] = cache_hits / total_requests if total_requests > 0 else 0.0
|
||||||
cache_hits / total_requests if total_requests > 0 else 0.0
|
|
||||||
)
|
|
||||||
metrics["bandwidth"]["saved_bytes"] = bandwidth_saved
|
metrics["bandwidth"]["saved_bytes"] = bandwidth_saved
|
||||||
|
|
||||||
# Get per-remote metrics
|
# Get per-remote metrics
|
||||||
for remote in config_manager.config.get("remotes", {}).keys():
|
for remote in config_manager.config.get("remotes", {}).keys():
|
||||||
remote_cache_hits = int(
|
remote_cache_hits = int(self.redis_client.client.get(f"metrics:cache_hits:{remote}") or 0)
|
||||||
self.redis_client.client.get(f"metrics:cache_hits:{remote}")
|
remote_cache_misses = int(self.redis_client.client.get(f"metrics:cache_misses:{remote}") or 0)
|
||||||
or 0
|
|
||||||
)
|
|
||||||
remote_cache_misses = int(
|
|
||||||
self.redis_client.client.get(f"metrics:cache_misses:{remote}")
|
|
||||||
or 0
|
|
||||||
)
|
|
||||||
remote_total = remote_cache_hits + remote_cache_misses
|
remote_total = remote_cache_hits + remote_cache_misses
|
||||||
remote_bandwidth_saved = int(
|
remote_bandwidth_saved = int(self.redis_client.client.get(f"metrics:bandwidth_saved:{remote}") or 0)
|
||||||
self.redis_client.client.get(
|
|
||||||
f"metrics:bandwidth_saved:{remote}"
|
|
||||||
)
|
|
||||||
or 0
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics["per_remote"][remote] = {
|
metrics["per_remote"][remote] = {
|
||||||
"cache_hits": remote_cache_hits,
|
"cache_hits": remote_cache_hits,
|
||||||
"cache_misses": remote_cache_misses,
|
"cache_misses": remote_cache_misses,
|
||||||
"total_requests": remote_total,
|
"total_requests": remote_total,
|
||||||
"cache_hit_ratio": remote_cache_hits / remote_total
|
"cache_hit_ratio": remote_cache_hits / remote_total if remote_total > 0 else 0.0,
|
||||||
if remote_total > 0
|
|
||||||
else 0.0,
|
|
||||||
"bandwidth_saved_bytes": remote_bandwidth_saved,
|
"bandwidth_saved_bytes": remote_bandwidth_saved,
|
||||||
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(
|
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(remote, 0),
|
||||||
remote, 0
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from . import generic, helm, npm, python, rpm
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
def get_content_type(filename: str) -> str:
|
||||||
|
if filename.endswith((".tar.gz", ".tgz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||||
|
return "application/zip"
|
||||||
|
if filename.endswith(".exe"):
|
||||||
|
return "application/x-msdownload"
|
||||||
|
if filename.endswith(".rpm"):
|
||||||
|
return "application/x-rpm"
|
||||||
|
if filename.endswith(".xml"):
|
||||||
|
return "application/xml"
|
||||||
|
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith((".yaml", ".yml")):
|
||||||
|
return "text/yaml"
|
||||||
|
return "application/octet-stream"
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if filename == "index.yaml":
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/yaml"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "application/json"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(base_url: str, path: str) -> str:
|
||||||
|
"""Build the upstream URL for a PyPI request.
|
||||||
|
|
||||||
|
PyPI splits simple/ index pages (pypi.org) from file downloads
|
||||||
|
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
|
||||||
|
"""
|
||||||
|
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||||
|
return f"https://pypi.org/{path}"
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/html; charset=utf-8"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .s3 import S3Storage
|
||||||
|
|
||||||
|
__all__ = ["S3Storage"]
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import os
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.config import Config
|
from botocore.config import Config
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
@@ -21,27 +22,25 @@ class S3Storage:
|
|||||||
self.bucket = bucket
|
self.bucket = bucket
|
||||||
self.secure = secure
|
self.secure = secure
|
||||||
|
|
||||||
ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('SSL_CERT_FILE')
|
ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get("SSL_CERT_FILE")
|
||||||
config_kwargs = {
|
config_kwargs = {"request_checksum_calculation": "when_required", "response_checksum_validation": "when_required"}
|
||||||
"request_checksum_calculation": "when_required",
|
|
||||||
"response_checksum_validation": "when_required"
|
|
||||||
}
|
|
||||||
client_kwargs = {
|
client_kwargs = {
|
||||||
"endpoint_url": f"http{'s' if self.secure else ''}://{self.endpoint}",
|
"endpoint_url": f"http{'s' if self.secure else ''}://{self.endpoint}",
|
||||||
"aws_access_key_id": self.access_key,
|
"aws_access_key_id": self.access_key,
|
||||||
"aws_secret_access_key": self.secret_key,
|
"aws_secret_access_key": self.secret_key,
|
||||||
"config": Config(**config_kwargs)
|
"config": Config(**config_kwargs),
|
||||||
}
|
}
|
||||||
|
|
||||||
if ca_bundle and os.path.exists(ca_bundle):
|
if ca_bundle and os.path.exists(ca_bundle):
|
||||||
client_kwargs["verify"] = ca_bundle
|
client_kwargs["verify"] = ca_bundle
|
||||||
print(f"Debug: Using CA bundle: {ca_bundle}")
|
print(f"Debug: Using CA bundle: {ca_bundle}")
|
||||||
else:
|
else:
|
||||||
print(f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}")
|
print(
|
||||||
|
f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}"
|
||||||
|
)
|
||||||
|
|
||||||
self.client = boto3.client("s3", **client_kwargs)
|
self.client = boto3.client("s3", **client_kwargs)
|
||||||
|
|
||||||
# Try to ensure bucket exists, but don't fail if MinIO isn't ready yet
|
|
||||||
try:
|
try:
|
||||||
self._ensure_bucket_exists()
|
self._ensure_bucket_exists()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -55,25 +54,21 @@ class S3Storage:
|
|||||||
self.client.create_bucket(Bucket=self.bucket)
|
self.client.create_bucket(Bucket=self.bucket)
|
||||||
|
|
||||||
def get_object_key(self, remote_name: str, path: str) -> str:
|
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||||
# Extract directory path and filename
|
clean_path = path.lstrip("/")
|
||||||
clean_path = path.lstrip('/')
|
|
||||||
filename = os.path.basename(clean_path)
|
filename = os.path.basename(clean_path)
|
||||||
directory_path = os.path.dirname(clean_path)
|
directory_path = os.path.dirname(clean_path)
|
||||||
|
|
||||||
# Special handling for Docker registry blobs (use digest as key for deduplication)
|
# Docker blobs are keyed by digest for deduplication across images
|
||||||
if "/blobs/sha256:" in clean_path:
|
if "/blobs/sha256:" in clean_path:
|
||||||
# Extract the SHA256 digest for Docker blobs
|
|
||||||
parts = clean_path.split("/blobs/sha256:")
|
parts = clean_path.split("/blobs/sha256:")
|
||||||
if len(parts) == 2:
|
if len(parts) == 2:
|
||||||
digest = parts[1]
|
digest = parts[1]
|
||||||
return f"{remote_name}/blobs/sha256/{digest}"
|
return f"{remote_name}/blobs/sha256/{digest}"
|
||||||
|
|
||||||
# Hash the directory path to keep keys manageable while preserving remote structure
|
|
||||||
if directory_path:
|
if directory_path:
|
||||||
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
||||||
return f"{remote_name}/{path_hash}/{filename}"
|
return f"{remote_name}/{path_hash}/{filename}"
|
||||||
else:
|
else:
|
||||||
# If no directory, just use remote and filename
|
|
||||||
return f"{remote_name}/{filename}"
|
return f"{remote_name}/{filename}"
|
||||||
|
|
||||||
def exists(self, key: str) -> bool:
|
def exists(self, key: str) -> bool:
|
||||||
@@ -0,0 +1,183 @@
|
|||||||
|
"""
|
||||||
|
Pytest configuration and shared fixtures.
|
||||||
|
|
||||||
|
Module-level setup (env vars + connection patches) runs before any test
|
||||||
|
module is imported, so the FastAPI app initialises against mocks rather
|
||||||
|
than real S3 / Redis / PostgreSQL services.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test remote configuration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
TEST_REMOTES = {
|
||||||
|
"remotes": {
|
||||||
|
"alpine-test": {
|
||||||
|
"base_url": "https://dl-cdn.alpinelinux.org",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "alpine",
|
||||||
|
"immutable_patterns": [".*/x86_64/.*\\.apk$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||||
|
},
|
||||||
|
"rpm-test": {
|
||||||
|
"base_url": "https://example.com/rpm",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "rpm",
|
||||||
|
"immutable_patterns": [".*/x86_64/.*\\.rpm$", ".*/repodata/.*$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||||
|
},
|
||||||
|
"docker-test": {
|
||||||
|
"base_url": "https://registry.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "docker",
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 300},
|
||||||
|
},
|
||||||
|
"docker-restricted": {
|
||||||
|
"base_url": "https://registry.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "docker",
|
||||||
|
"immutable_patterns": ["^library/nginx"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 300},
|
||||||
|
},
|
||||||
|
"generic-test": {
|
||||||
|
"base_url": "https://releases.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"immutable_patterns": [".*\\.tar\\.gz$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
|
"custom-index-test": {
|
||||||
|
"base_url": "https://example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"mutable_patterns": ["metadata\\.json$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
|
},
|
||||||
|
"check-mutable-test": {
|
||||||
|
"base_url": "https://example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"mutable_patterns": ["metadata\\.json$"],
|
||||||
|
"check_mutable_updates": True,
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
|
},
|
||||||
|
"local-test": {
|
||||||
|
"type": "local",
|
||||||
|
"package": "generic",
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
|
"pypi-test": {
|
||||||
|
"base_url": "https://files.pythonhosted.org",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "pypi",
|
||||||
|
"immutable_patterns": [
|
||||||
|
r"packages/.*\.whl$",
|
||||||
|
r"packages/.*\.whl\.metadata$",
|
||||||
|
r"packages/.*\.tar\.gz$",
|
||||||
|
],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
|
},
|
||||||
|
"npm-test": {
|
||||||
|
"base_url": "https://registry.npmjs.org",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "npm",
|
||||||
|
"immutable_patterns": [r"\.tgz$"],
|
||||||
|
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
|
},
|
||||||
|
"helm-test": {
|
||||||
|
"base_url": "https://helm.releases.hashicorp.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "helm",
|
||||||
|
"immutable_patterns": [r"\.tgz$"],
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||||
|
},
|
||||||
|
"quarantine-test": {
|
||||||
|
"base_url": "https://releases.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 3,
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
|
"quarantine-disabled": {
|
||||||
|
"base_url": "https://releases.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"quarantine_new": False,
|
||||||
|
"quarantine_days": 3,
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Write temp config and set env vars BEFORE importing the package
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_tmpdir = tempfile.mkdtemp()
|
||||||
|
_config_path = os.path.join(_tmpdir, "remotes.yaml")
|
||||||
|
with open(_config_path, "w") as _f:
|
||||||
|
yaml.dump(TEST_REMOTES, _f)
|
||||||
|
|
||||||
|
os.environ.update(
|
||||||
|
{
|
||||||
|
"CONFIG_PATH": _config_path,
|
||||||
|
"MINIO_ENDPOINT": "localhost:9000",
|
||||||
|
"MINIO_ACCESS_KEY": "testkey",
|
||||||
|
"MINIO_SECRET_KEY": "testsecret",
|
||||||
|
"MINIO_BUCKET": "testbucket",
|
||||||
|
"REDIS_URL": "redis://localhost:6379/0",
|
||||||
|
"DBHOST": "localhost",
|
||||||
|
"DBPORT": "5432",
|
||||||
|
"DBUSER": "test",
|
||||||
|
"DBPASS": "test",
|
||||||
|
"DBNAME": "test",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Patch external service connections before the package is imported.
|
||||||
|
# These stay active for the whole session (process exits after tests finish).
|
||||||
|
_boto3_patch = patch("boto3.client", return_value=MagicMock())
|
||||||
|
_redis_patch = patch("redis.from_url", return_value=MagicMock())
|
||||||
|
_psycopg2_patch = patch("psycopg2.connect", return_value=MagicMock())
|
||||||
|
_boto3_patch.start()
|
||||||
|
_redis_patch.start()
|
||||||
|
_psycopg2_patch.start()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Shared fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import pytest # noqa: E402
|
||||||
|
from fastapi.testclient import TestClient # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def app():
|
||||||
|
from artifactapi.main import app as fastapi_app
|
||||||
|
|
||||||
|
return fastapi_app
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def client(app):
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def config_path():
|
||||||
|
return _config_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_remotes():
|
||||||
|
return TEST_REMOTES
|
||||||
@@ -0,0 +1,329 @@
|
|||||||
|
"""Tests for RedisCache, focusing on is_mutable_file with configurable patterns."""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from unittest.mock import ANY, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from artifactapi.cache import RedisCache
|
||||||
|
from artifactapi.config import _PACKAGE_MUTABLE_PATTERNS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def bare_cache():
|
||||||
|
"""RedisCache instance bypassing __init__ (no Redis needed for pure-logic tests)."""
|
||||||
|
return RedisCache.__new__(RedisCache)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def unavailable_cache():
|
||||||
|
"""RedisCache where Redis is not reachable."""
|
||||||
|
with patch("redis.from_url", side_effect=Exception("connection refused")):
|
||||||
|
return RedisCache("redis://localhost:6379/0")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_redis_client():
|
||||||
|
return MagicMock()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cache_with_redis(mock_redis_client):
|
||||||
|
"""RedisCache backed by a MagicMock Redis client."""
|
||||||
|
with patch("redis.from_url", return_value=mock_redis_client):
|
||||||
|
c = RedisCache("redis://localhost:6379/0")
|
||||||
|
c.client = mock_redis_client
|
||||||
|
c.available = True
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# is_mutable_file — alpine patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsMutableFileAlpine:
|
||||||
|
def test_apkindex_tarball_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns)
|
||||||
|
|
||||||
|
def test_nested_apkindex_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert bare_cache.is_mutable_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns)
|
||||||
|
|
||||||
|
def test_apk_package_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns)
|
||||||
|
|
||||||
|
def test_random_tarball_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert not bare_cache.is_mutable_file("some/path/archive.tar.gz", patterns)
|
||||||
|
|
||||||
|
def test_apkindex_signature_file_is_not_index(self, bare_cache):
|
||||||
|
# Signature file adjacent to the index should not be treated as an index
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns)
|
||||||
|
|
||||||
|
def test_apkindex_tmp_file_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
||||||
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# is_mutable_file — rpm patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsMutableFileRpm:
|
||||||
|
def test_repomd_xml_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("almalinux/9/x86_64/repomd.xml", patterns)
|
||||||
|
|
||||||
|
def test_repodata_primary_xml_gz_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("repo/repodata/primary.xml.gz", patterns)
|
||||||
|
|
||||||
|
def test_repodata_sqlite_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("repo/repodata/primary.sqlite", patterns)
|
||||||
|
|
||||||
|
def test_repodata_sqlite_bz2_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("repo/repodata/other.sqlite.bz2", patterns)
|
||||||
|
|
||||||
|
def test_repodata_yaml_xz_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("repo/repodata/comps.yaml.xz", patterns)
|
||||||
|
|
||||||
|
def test_packages_gz_pattern_matches_any_path(self, bare_cache):
|
||||||
|
# The Packages.gz$ regex is a carryover from the original hardcoded logic and
|
||||||
|
# deliberately matches any path ending in Packages.gz — including Debian-style paths.
|
||||||
|
# This test documents that intentional behaviour.
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert bare_cache.is_mutable_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns)
|
||||||
|
|
||||||
|
def test_rpm_package_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert not bare_cache.is_mutable_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns)
|
||||||
|
|
||||||
|
def test_arbitrary_xml_outside_repodata_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
||||||
|
assert not bare_cache.is_mutable_file("some/path/config.xml", patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# is_mutable_file — docker patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsMutableFileDocker:
|
||||||
|
def test_tag_manifest_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert bare_cache.is_mutable_file("library/nginx/manifests/latest", patterns)
|
||||||
|
|
||||||
|
def test_version_tag_manifest_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert bare_cache.is_mutable_file("library/nginx/manifests/1.25.3", patterns)
|
||||||
|
|
||||||
|
def test_hyphenated_tag_manifest_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert bare_cache.is_mutable_file("library/nginx/manifests/latest-rc", patterns)
|
||||||
|
|
||||||
|
def test_numeric_date_tag_manifest_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert bare_cache.is_mutable_file("library/nginx/manifests/20240101", patterns)
|
||||||
|
|
||||||
|
def test_digest_manifest_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
digest = "sha256:" + "a" * 64
|
||||||
|
assert not bare_cache.is_mutable_file(f"library/nginx/manifests/{digest}", patterns)
|
||||||
|
|
||||||
|
def test_tags_list_is_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert bare_cache.is_mutable_file("library/nginx/tags/list", patterns)
|
||||||
|
|
||||||
|
def test_blob_is_not_index(self, bare_cache):
|
||||||
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
||||||
|
assert not bare_cache.is_mutable_file("library/nginx/blobs/sha256:abc123", patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# is_mutable_file — edge cases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsMutableFileEdgeCases:
|
||||||
|
def test_empty_patterns_nothing_is_index(self, bare_cache):
|
||||||
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", [])
|
||||||
|
assert not bare_cache.is_mutable_file("repomd.xml", [])
|
||||||
|
assert not bare_cache.is_mutable_file("library/nginx/manifests/latest", [])
|
||||||
|
|
||||||
|
def test_none_patterns_nothing_is_index(self, bare_cache):
|
||||||
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", None)
|
||||||
|
assert not bare_cache.is_mutable_file("repomd.xml", None)
|
||||||
|
|
||||||
|
def test_custom_patterns_match(self, bare_cache):
|
||||||
|
patterns = [r"metadata\.json$", r"index\.yaml$"]
|
||||||
|
assert bare_cache.is_mutable_file("repo/metadata.json", patterns)
|
||||||
|
assert bare_cache.is_mutable_file("repo/subdir/index.yaml", patterns)
|
||||||
|
assert not bare_cache.is_mutable_file("repo/data.tar.gz", patterns)
|
||||||
|
|
||||||
|
def test_custom_pattern_does_not_match_standard_index(self, bare_cache):
|
||||||
|
patterns = [r"metadata\.json$"]
|
||||||
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_index_cache_key
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetIndexCacheKey:
|
||||||
|
def test_key_format_is_deterministic(self, bare_cache):
|
||||||
|
# Assert against a pre-computed value to pin the hash algorithm,
|
||||||
|
# truncation length, and format string in one assertion.
|
||||||
|
path = "alpine/v3.18/x86_64/APKINDEX.tar.gz"
|
||||||
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||||
|
key = bare_cache.get_index_cache_key("alpine-test", path)
|
||||||
|
assert key == f"index:alpine-test:{expected_hash}"
|
||||||
|
|
||||||
|
def test_different_paths_produce_different_keys(self, bare_cache):
|
||||||
|
k1 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||||
|
k2 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.19/x86_64/APKINDEX.tar.gz")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_different_remotes_produce_different_keys(self, bare_cache):
|
||||||
|
k1 = bare_cache.get_index_cache_key("remote-a", "path/to/APKINDEX.tar.gz")
|
||||||
|
k2 = bare_cache.get_index_cache_key("remote-b", "path/to/APKINDEX.tar.gz")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_key_starts_with_index_prefix_and_remote(self, bare_cache):
|
||||||
|
key = bare_cache.get_index_cache_key("myremote", "some/path")
|
||||||
|
assert key.startswith("index:myremote:")
|
||||||
|
|
||||||
|
def test_key_hash_segment_is_16_chars(self, bare_cache):
|
||||||
|
key = bare_cache.get_index_cache_key("myremote", "some/path/file.xml")
|
||||||
|
# Format: index:<remote>:<16-char hash> — the fixed length matters for key-space hygiene
|
||||||
|
parts = key.split(":")
|
||||||
|
assert len(parts) == 3
|
||||||
|
assert len(parts[2]) == 16
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# mark_index_cached / is_index_valid
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestIndexValidity:
|
||||||
|
def test_mark_index_cached_calls_setex_with_correct_ttl(self, cache_with_redis, mock_redis_client):
|
||||||
|
cache_with_redis.mark_index_cached("remote", "path/APKINDEX.tar.gz", 300)
|
||||||
|
expected_key = cache_with_redis.get_index_cache_key("remote", "path/APKINDEX.tar.gz")
|
||||||
|
mock_redis_client.setex.assert_called_once_with(expected_key, 300, ANY)
|
||||||
|
|
||||||
|
def test_present_key_is_valid(self, cache_with_redis, mock_redis_client):
|
||||||
|
mock_redis_client.exists.return_value = 1
|
||||||
|
assert cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
||||||
|
|
||||||
|
def test_missing_key_is_not_valid(self, cache_with_redis, mock_redis_client):
|
||||||
|
mock_redis_client.exists.return_value = 0
|
||||||
|
assert not cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
||||||
|
|
||||||
|
def test_unavailable_redis_is_not_valid(self, unavailable_cache):
|
||||||
|
assert not unavailable_cache.is_index_valid("remote", "some/path")
|
||||||
|
|
||||||
|
def test_mark_cached_no_op_when_unavailable(self, unavailable_cache):
|
||||||
|
# client is None when Redis is unavailable — setex cannot be called
|
||||||
|
assert unavailable_cache.client is None
|
||||||
|
unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# mutable meta (ETag / Last-Modified storage)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMutableMeta:
|
||||||
|
def test_meta_key_format(self, bare_cache):
|
||||||
|
path = "repo/metadata.json"
|
||||||
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||||
|
assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}"
|
||||||
|
|
||||||
|
def test_meta_key_hash_is_16_chars(self, bare_cache):
|
||||||
|
key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json")
|
||||||
|
assert len(key.split(":")[-1]) == 16
|
||||||
|
|
||||||
|
def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client):
|
||||||
|
mock_redis_client.hgetall.return_value = {"etag": '"abc123"'}
|
||||||
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None)
|
||||||
|
mock_redis_client.hset.assert_called_once()
|
||||||
|
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
||||||
|
assert meta["etag"] == '"abc123"'
|
||||||
|
|
||||||
|
def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client):
|
||||||
|
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
||||||
|
mock_redis_client.hgetall.return_value = {"last_modified": lm}
|
||||||
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm)
|
||||||
|
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
||||||
|
assert meta["last_modified"] == lm
|
||||||
|
|
||||||
|
def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client):
|
||||||
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None)
|
||||||
|
mock_redis_client.hset.assert_not_called()
|
||||||
|
|
||||||
|
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
||||||
|
unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise
|
||||||
|
|
||||||
|
def test_get_returns_empty_when_unavailable(self, unavailable_cache):
|
||||||
|
assert unavailable_cache.get_mutable_meta("remote", "path") == {}
|
||||||
|
|
||||||
|
def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client):
|
||||||
|
expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json")
|
||||||
|
cache_with_redis.delete_mutable_meta("remote", "path/meta.json")
|
||||||
|
mock_redis_client.delete.assert_called_once_with(expected_key)
|
||||||
|
|
||||||
|
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
||||||
|
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# artifact published date (quarantine support)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestArtifactPublished:
|
||||||
|
def test_key_format_is_deterministic(self, bare_cache):
|
||||||
|
path = "some/path/package-1.0.tar.gz"
|
||||||
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||||
|
assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}"
|
||||||
|
|
||||||
|
def test_key_hash_is_16_chars(self, bare_cache):
|
||||||
|
key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl")
|
||||||
|
assert len(key.split(":")[-1]) == 16
|
||||||
|
|
||||||
|
def test_different_paths_produce_different_keys(self, bare_cache):
|
||||||
|
k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz")
|
||||||
|
k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client):
|
||||||
|
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
||||||
|
cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm)
|
||||||
|
expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz")
|
||||||
|
mock_redis_client.set.assert_called_once_with(expected_key, lm)
|
||||||
|
|
||||||
|
def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client):
|
||||||
|
lm = "Tue, 15 Mar 2022 12:00:00 GMT"
|
||||||
|
mock_redis_client.get.return_value = lm
|
||||||
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||||
|
assert result == lm
|
||||||
|
|
||||||
|
def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client):
|
||||||
|
mock_redis_client.get.return_value = None
|
||||||
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
||||||
|
unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT")
|
||||||
|
|
||||||
|
def test_get_returns_none_when_unavailable(self, unavailable_cache):
|
||||||
|
assert unavailable_cache.get_artifact_published("remote", "path") is None
|
||||||
@@ -0,0 +1,540 @@
|
|||||||
|
"""Tests for ConfigManager, focusing on get_mutable_patterns and get_immutable_patterns."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from artifactapi.config import ConfigManager
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def make_config(tmp_path):
|
||||||
|
"""Factory: write a remotes dict to a temp YAML and return a ConfigManager."""
|
||||||
|
|
||||||
|
def _make(remotes_dict):
|
||||||
|
cfg_file = tmp_path / "remotes.yaml"
|
||||||
|
cfg_file.write_text(yaml.dump({"remotes": remotes_dict}))
|
||||||
|
return ConfigManager(str(cfg_file))
|
||||||
|
|
||||||
|
return _make
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_mutable_patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetMutablePatterns:
|
||||||
|
def test_alpine_returns_package_defaults(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"APKINDEX\.tar\.gz$" in patterns
|
||||||
|
|
||||||
|
def test_rpm_returns_package_defaults(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "rpm", "base_url": "https://x.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"repomd\.xml$" in patterns
|
||||||
|
assert any("repodata" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_docker_returns_package_defaults(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "docker", "base_url": "https://x.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert any("manifests" in p for p in patterns)
|
||||||
|
assert any("tags/list" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_generic_returns_empty_list(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_unknown_remote_returns_empty_list(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
assert cfg.get_mutable_patterns("nonexistent") == []
|
||||||
|
|
||||||
|
def test_missing_package_field_defaults_to_generic(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_unknown_package_type_returns_empty_list(self, make_config):
|
||||||
|
# A mis-spelled package type silently returns [] — this is a known footgun
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "deb", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_extra_patterns_appended_after_defaults(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "alpine",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"custom\.json$"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"APKINDEX\.tar\.gz$" in patterns
|
||||||
|
assert r"custom\.json$" in patterns
|
||||||
|
# Defaults come first
|
||||||
|
assert patterns.index(r"APKINDEX\.tar\.gz$") < patterns.index(r"custom\.json$")
|
||||||
|
|
||||||
|
def test_explicit_empty_extra_patterns_returns_defaults(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "alpine",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert r"APKINDEX\.tar\.gz$" in cfg.get_mutable_patterns("r")
|
||||||
|
|
||||||
|
def test_duplicate_extra_pattern_not_added_twice(self, make_config):
|
||||||
|
existing = r"APKINDEX\.tar\.gz$"
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "alpine",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [existing],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert patterns.count(existing) == 1
|
||||||
|
|
||||||
|
def test_generic_with_only_extra_patterns(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"meta\.json$", r"index\.yaml$"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_mutable_patterns("r") == [r"meta\.json$", r"index\.yaml$"]
|
||||||
|
|
||||||
|
def test_rpm_extra_patterns_merged(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "rpm",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"custom-meta\.xml$"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"repomd\.xml$" in patterns
|
||||||
|
assert r"custom-meta\.xml$" in patterns
|
||||||
|
|
||||||
|
def test_npm_has_no_package_defaults(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "npm", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_npm_explicit_mutable_pattern_matches_metadata(self, make_config):
|
||||||
|
import re
|
||||||
|
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "npm",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert any(re.search(p, "express") for p in patterns)
|
||||||
|
assert any(re.search(p, "@babel/core") for p in patterns)
|
||||||
|
|
||||||
|
def test_helm_returns_index_yaml_as_mutable(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert r"index\.yaml$" in patterns
|
||||||
|
|
||||||
|
def test_helm_chart_tarballs_not_mutable_by_default(self, make_config):
|
||||||
|
import re
|
||||||
|
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
# Only index.yaml is mutable; .tgz chart tarballs are not
|
||||||
|
assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns)
|
||||||
|
assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns)
|
||||||
|
|
||||||
|
def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config):
|
||||||
|
import re
|
||||||
|
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "npm",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
patterns = cfg.get_mutable_patterns("r")
|
||||||
|
assert not any(re.search(p, "express-4.18.2.tgz") for p in patterns)
|
||||||
|
assert not any(re.search(p, "express/-/express-4.18.2.tgz") for p in patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_immutable_patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetImmutablePatterns:
|
||||||
|
def test_returns_immutable_patterns(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_immutable_patterns("r") == [r".*\.tar\.gz$"]
|
||||||
|
|
||||||
|
def test_returns_empty_for_missing_remote(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
assert cfg.get_immutable_patterns("nonexistent") == []
|
||||||
|
|
||||||
|
def test_returns_empty_when_no_patterns_configured(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_immutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_multiple_patterns_returned(self, make_config):
|
||||||
|
patterns = [r".*\.rpm$", r".*/repodata/.*$"]
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "rpm",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"immutable_patterns": patterns,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_immutable_patterns("r") == patterns
|
||||||
|
|
||||||
|
def test_dict_keyed_repositories_returns_per_repo_patterns(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"repositories": {
|
||||||
|
"/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_immutable_patterns("r", "/path/to/repo") == [r".*\.rpm$"]
|
||||||
|
|
||||||
|
def test_dict_keyed_repositories_falls_back_to_remote_patterns(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"repositories": {
|
||||||
|
"/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_immutable_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_user_mutable_patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetUserMutablePatterns:
|
||||||
|
def test_returns_only_user_patterns(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "alpine",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"mutable_patterns": [r"custom\.json$"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_user_mutable_patterns("r") == [r"custom\.json$"]
|
||||||
|
|
||||||
|
def test_excludes_package_defaults(self, make_config):
|
||||||
|
# Package defaults (APKINDEX etc.) must NOT appear here
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_user_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
def test_returns_empty_for_missing_remote(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
assert cfg.get_user_mutable_patterns("nonexistent") == []
|
||||||
|
|
||||||
|
def test_returns_empty_when_key_absent(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_user_mutable_patterns("r") == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_cache_config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetCacheConfig:
|
||||||
|
def test_returns_cache_section(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 7200},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert cfg.get_cache_config("r") == {"immutable_ttl": 0, "mutable_ttl": 7200}
|
||||||
|
|
||||||
|
def test_returns_empty_dict_for_missing_remote(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
assert cfg.get_cache_config("nonexistent") == {}
|
||||||
|
|
||||||
|
def test_returns_empty_dict_when_no_cache_key(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
assert cfg.get_cache_config("r") == {}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Config file reload
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigReload:
|
||||||
|
def test_reloads_when_file_mtime_advances(self, tmp_path):
|
||||||
|
cfg_file = tmp_path / "remotes.yaml"
|
||||||
|
cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}}))
|
||||||
|
cfg = ConfigManager(str(cfg_file))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
cfg_file.write_text(yaml.dump({"remotes": {"repo-b": {"type": "remote", "package": "generic", "base_url": "https://y.com"}}}))
|
||||||
|
future_mtime = cfg._last_modified + 1
|
||||||
|
os.utime(str(cfg_file), (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-b" in cfg.config["remotes"]
|
||||||
|
assert "repo-a" not in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_no_reload_when_file_unchanged(self, tmp_path):
|
||||||
|
cfg_file = tmp_path / "remotes.yaml"
|
||||||
|
cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"type": "remote", "package": "generic", "base_url": "https://x.com"}}}))
|
||||||
|
cfg = ConfigManager(str(cfg_file))
|
||||||
|
|
||||||
|
# Call check_reload without touching the file — should not reload
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_quarantine_config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetQuarantineConfig:
|
||||||
|
def test_returns_false_zero_when_not_configured(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
def test_returns_false_zero_for_missing_remote(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
enabled, days = cfg.get_quarantine_config("nonexistent")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
def test_enabled_true_and_days_returned(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 7,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is True
|
||||||
|
assert days == 7
|
||||||
|
|
||||||
|
def test_quarantine_new_false_returns_disabled(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": False,
|
||||||
|
"quarantine_days": 7,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 7
|
||||||
|
|
||||||
|
def test_enabled_with_zero_days_returns_zero(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is True
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Directory mode (CONFIG_PATH points to a directory)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _remote(base_url: str = "https://x.com") -> dict:
|
||||||
|
return {"type": "remote", "package": "generic", "base_url": base_url}
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigDirMode:
|
||||||
|
def test_loads_all_yaml_files(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_later_file_overrides_earlier_on_same_key(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://first.com")}}))
|
||||||
|
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://second.com")}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert cfg.config["remotes"]["r"]["base_url"] == "https://second.com"
|
||||||
|
|
||||||
|
def test_empty_directory_returns_empty_remotes(self, tmp_path):
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert cfg.config == {"remotes": {}}
|
||||||
|
|
||||||
|
def test_ignores_non_yaml_files(self, tmp_path):
|
||||||
|
(tmp_path / "notes.txt").write_text("not yaml")
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert list(cfg.config["remotes"].keys()) == ["repo-a"]
|
||||||
|
|
||||||
|
def test_reload_picks_up_new_file(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" not in cfg.config["remotes"]
|
||||||
|
|
||||||
|
new_file = tmp_path / "b.yaml"
|
||||||
|
new_file.write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||||
|
future_mtime = cfg._last_modified + 1
|
||||||
|
os.utime(str(new_file), (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# config_dir key (main file contains a config_dir pointer)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigDirKey:
|
||||||
|
def test_merges_remotes_from_config_dir(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "remotes.yaml").write_text(yaml.dump({"remotes": {"repo-extra": _remote("https://extra.com")}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-main" in cfg.config["remotes"]
|
||||||
|
assert "repo-extra" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_relative_config_dir_resolved_from_main_file(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "r.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": "conf.d", "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_config_dir_key_not_present_in_loaded_config(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "config_dir" not in cfg.config
|
||||||
|
|
||||||
|
def test_dir_remote_overrides_main_file_remote(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "override.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://new.com")}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"r": _remote("https://old.com")}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert cfg.config["remotes"]["r"]["base_url"] == "https://new.com"
|
||||||
|
|
||||||
|
def test_empty_config_dir_uses_main_file_only(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert list(cfg.config["remotes"].keys()) == ["repo-main"]
|
||||||
|
|
||||||
|
def test_reload_picks_up_changed_dir_file(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
dir_file = conf_d / "r.yaml"
|
||||||
|
dir_file.write_text(yaml.dump({"remotes": {"repo-v1": _remote()}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-v1" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
dir_file.write_text(yaml.dump({"remotes": {"repo-v2": _remote("https://v2.com")}}))
|
||||||
|
future_mtime = cfg._last_modified + 1
|
||||||
|
os.utime(str(dir_file), (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-v2" in cfg.config["remotes"]
|
||||||
|
assert "repo-v1" not in cfg.config["remotes"]
|
||||||
@@ -0,0 +1,273 @@
|
|||||||
|
"""Tests for docker_auth: WWW-Authenticate parsing and token caching."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from artifactapi import docker_auth
|
||||||
|
from artifactapi.docker_auth import (
|
||||||
|
_cache_key,
|
||||||
|
_get_cached_token,
|
||||||
|
_store_token,
|
||||||
|
fetch_token,
|
||||||
|
get_docker_token_for_response,
|
||||||
|
parse_www_authenticate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def clear_token_cache():
|
||||||
|
"""Isolate tests: wipe the module-level token cache before and after each test."""
|
||||||
|
docker_auth._token_cache.clear()
|
||||||
|
yield
|
||||||
|
docker_auth._token_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# parse_www_authenticate
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseWwwAuthenticate:
|
||||||
|
def test_full_bearer_header(self):
|
||||||
|
header = 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/nginx:pull"'
|
||||||
|
result = parse_www_authenticate(header)
|
||||||
|
assert result is not None
|
||||||
|
realm, service, scope = result
|
||||||
|
assert realm == "https://auth.docker.io/token"
|
||||||
|
assert service == "registry.docker.io"
|
||||||
|
assert scope == "repository:library/nginx:pull"
|
||||||
|
|
||||||
|
def test_realm_only(self):
|
||||||
|
header = 'Bearer realm="https://auth.example.com/token"'
|
||||||
|
result = parse_www_authenticate(header)
|
||||||
|
assert result is not None
|
||||||
|
realm, service, scope = result
|
||||||
|
assert realm == "https://auth.example.com/token"
|
||||||
|
assert service == ""
|
||||||
|
assert scope == ""
|
||||||
|
|
||||||
|
def test_realm_and_service_only(self):
|
||||||
|
header = 'Bearer realm="https://auth.example.com",service="registry.example.com"'
|
||||||
|
result = parse_www_authenticate(header)
|
||||||
|
assert result is not None
|
||||||
|
_, service, scope = result
|
||||||
|
assert service == "registry.example.com"
|
||||||
|
assert scope == ""
|
||||||
|
|
||||||
|
def test_invalid_scheme_returns_none(self):
|
||||||
|
assert parse_www_authenticate('Basic realm="example"') is None
|
||||||
|
|
||||||
|
def test_empty_header_returns_none(self):
|
||||||
|
assert parse_www_authenticate("") is None
|
||||||
|
|
||||||
|
def test_case_insensitive_bearer_parses_realm(self):
|
||||||
|
header = 'bearer realm="https://auth.example.com/token"'
|
||||||
|
result = parse_www_authenticate(header)
|
||||||
|
assert result is not None
|
||||||
|
realm, _, _ = result
|
||||||
|
assert realm == "https://auth.example.com/token"
|
||||||
|
|
||||||
|
def test_field_order_scope_before_service_drops_service(self):
|
||||||
|
# The regex requires realm,service,scope order; scope before service
|
||||||
|
# results in service being silently dropped. This test documents the known limitation.
|
||||||
|
header = 'Bearer realm="https://auth.example.com",scope="repo:pull",service="svc"'
|
||||||
|
result = parse_www_authenticate(header)
|
||||||
|
assert result is not None
|
||||||
|
realm, service, scope = result
|
||||||
|
assert realm == "https://auth.example.com"
|
||||||
|
assert scope == "repo:pull"
|
||||||
|
assert service == "" # silently dropped when out of order
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _cache_key
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCacheKey:
|
||||||
|
def test_key_contains_all_components(self):
|
||||||
|
key = _cache_key("https://realm.com", "svc", "scope", "user")
|
||||||
|
assert "https://realm.com" in key
|
||||||
|
assert "svc" in key
|
||||||
|
assert "scope" in key
|
||||||
|
assert "user" in key
|
||||||
|
|
||||||
|
def test_none_username_uses_empty_string(self):
|
||||||
|
key = _cache_key("https://realm.com", "svc", "scope", None)
|
||||||
|
assert key.endswith("|")
|
||||||
|
|
||||||
|
def test_different_services_give_different_keys(self):
|
||||||
|
k1 = _cache_key("realm", "svc1", "scope", None)
|
||||||
|
k2 = _cache_key("realm", "svc2", "scope", None)
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_different_scopes_give_different_keys(self):
|
||||||
|
k1 = _cache_key("realm", "svc", "scope:read", None)
|
||||||
|
k2 = _cache_key("realm", "svc", "scope:write", None)
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_pipe_in_field_value_can_collide_with_adjacent_fields(self):
|
||||||
|
# The "|" separator is not escaped, so a pipe embedded in one field
|
||||||
|
# produces the same key as the same pipe appearing as a separator boundary.
|
||||||
|
# This is a known limitation: _cache_key("a|b","c","d",None) ==
|
||||||
|
# _cache_key("a","b|c","d",None). Documents the behaviour, not a claim it's correct.
|
||||||
|
k1 = _cache_key("a|b", "c", "d", None)
|
||||||
|
k2 = _cache_key("a", "b|c", "d", None)
|
||||||
|
assert k1 == k2
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _get_cached_token / _store_token
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestTokenCaching:
|
||||||
|
def test_get_returns_none_when_not_cached(self):
|
||||||
|
assert _get_cached_token("no-such-key") is None
|
||||||
|
|
||||||
|
def test_get_returns_token_when_valid(self):
|
||||||
|
_store_token("mykey", "tok-abc", 300)
|
||||||
|
assert _get_cached_token("mykey") == "tok-abc"
|
||||||
|
|
||||||
|
def test_get_returns_none_when_expired(self):
|
||||||
|
docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1)
|
||||||
|
assert _get_cached_token("mykey") is None
|
||||||
|
|
||||||
|
def test_expired_entry_is_removed_from_cache(self):
|
||||||
|
docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1)
|
||||||
|
_get_cached_token("mykey")
|
||||||
|
assert "mykey" not in docker_auth._token_cache
|
||||||
|
|
||||||
|
def test_store_expires_30s_before_stated_time(self):
|
||||||
|
before = time.time()
|
||||||
|
_store_token("mykey", "tok", 100)
|
||||||
|
_, expires_at = docker_auth._token_cache["mykey"]
|
||||||
|
# expires_in - 30 = 70; allow ±2 s clock wiggle
|
||||||
|
assert before + 68 <= expires_at <= before + 72
|
||||||
|
|
||||||
|
def test_store_enforces_minimum_10s_expiry(self):
|
||||||
|
before = time.time()
|
||||||
|
_store_token("mykey", "tok", 5) # expires_in - 30 would be negative
|
||||||
|
_, expires_at = docker_auth._token_cache["mykey"]
|
||||||
|
assert expires_at >= before + 10
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# fetch_token (async, mocks httpx)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _make_mock_http_client(token_payload: dict):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.raise_for_status = MagicMock()
|
||||||
|
mock_response.json.return_value = token_payload
|
||||||
|
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||||
|
return ctx, mock_client
|
||||||
|
|
||||||
|
|
||||||
|
class TestFetchToken:
|
||||||
|
async def test_returns_token_field(self):
|
||||||
|
ctx, _ = _make_mock_http_client({"token": "bearer-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token == "bearer-tok"
|
||||||
|
|
||||||
|
async def test_falls_back_to_access_token_field(self):
|
||||||
|
ctx, _ = _make_mock_http_client({"access_token": "access-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token == "access-tok"
|
||||||
|
|
||||||
|
async def test_returns_none_when_response_missing_token_field(self):
|
||||||
|
ctx, _ = _make_mock_http_client({"not_token": "value", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token is None
|
||||||
|
|
||||||
|
async def test_defaults_expires_in_to_300_when_missing(self):
|
||||||
|
ctx, _ = _make_mock_http_client({"token": "tok"}) # no expires_in key
|
||||||
|
before = time.time()
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token == "tok"
|
||||||
|
key = _cache_key("https://auth.example.com", "svc", "scope", None)
|
||||||
|
_, expires_at = docker_auth._token_cache[key]
|
||||||
|
# Default expires_in=300, stored as time.time() + max(300-30, 10) = 270
|
||||||
|
assert before + 268 <= expires_at <= before + 272
|
||||||
|
|
||||||
|
async def test_uses_cache_on_second_call_without_http(self):
|
||||||
|
ctx, mock_client = _make_mock_http_client({"token": "cached-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
mock_client.get.reset_mock()
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
mock_client.get.assert_not_called()
|
||||||
|
assert token == "cached-tok"
|
||||||
|
|
||||||
|
async def test_returns_none_on_network_error(self):
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(side_effect=Exception("connection refused"))
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token is None
|
||||||
|
|
||||||
|
async def test_returns_none_on_http_status_error(self):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError("401 Unauthorized", request=MagicMock(), response=MagicMock())
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
ctx.__aexit__ = AsyncMock(return_value=False)
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
assert token is None
|
||||||
|
|
||||||
|
async def test_passes_credentials_as_auth_tuple(self):
|
||||||
|
ctx, mock_client = _make_mock_http_client({"token": "authed-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
await fetch_token("https://auth.example.com", "svc", "scope", "user", "pass")
|
||||||
|
call_kwargs = mock_client.get.call_args.kwargs
|
||||||
|
assert call_kwargs.get("auth") == ("user", "pass")
|
||||||
|
|
||||||
|
async def test_no_auth_when_no_credentials(self):
|
||||||
|
ctx, mock_client = _make_mock_http_client({"token": "anon-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
await fetch_token("https://auth.example.com", "svc", "scope")
|
||||||
|
call_kwargs = mock_client.get.call_args.kwargs
|
||||||
|
assert call_kwargs.get("auth") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_docker_token_for_response
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetDockerTokenForResponse:
|
||||||
|
async def test_returns_none_for_non_bearer_header(self):
|
||||||
|
token = await get_docker_token_for_response('Basic realm="example"')
|
||||||
|
assert token is None
|
||||||
|
|
||||||
|
async def test_end_to_end_parse_and_fetch(self):
|
||||||
|
"""parse_www_authenticate → fetch_token wired together end-to-end."""
|
||||||
|
header = 'Bearer realm="https://auth.example.com",service="svc",scope="repo:pull"'
|
||||||
|
ctx, mock_client = _make_mock_http_client({"token": "e2e-tok", "expires_in": 300})
|
||||||
|
with patch("httpx.AsyncClient", return_value=ctx):
|
||||||
|
token = await get_docker_token_for_response(header, "user", "pass")
|
||||||
|
assert token == "e2e-tok"
|
||||||
|
call_kwargs = mock_client.get.call_args.kwargs
|
||||||
|
assert call_kwargs["params"]["service"] == "svc"
|
||||||
|
assert call_kwargs["params"]["scope"] == "repo:pull"
|
||||||
|
assert call_kwargs["auth"] == ("user", "pass")
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,132 @@
|
|||||||
|
"""Tests for S3Storage: get_object_key (pure logic) and I/O methods."""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from artifactapi.storage import S3Storage
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def storage():
|
||||||
|
"""S3Storage with a mocked boto3 client."""
|
||||||
|
with patch("boto3.client", return_value=MagicMock()):
|
||||||
|
s = S3Storage(
|
||||||
|
endpoint="localhost:9000",
|
||||||
|
access_key="testkey",
|
||||||
|
secret_key="testsecret",
|
||||||
|
bucket="testbucket",
|
||||||
|
secure=False,
|
||||||
|
)
|
||||||
|
s.client = MagicMock()
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_object_key
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetObjectKey:
|
||||||
|
def test_key_has_three_part_structure(self, storage):
|
||||||
|
# remote / hash-segment / filename
|
||||||
|
key = storage.get_object_key("myremote", "some/path/to/file.rpm")
|
||||||
|
parts = key.split("/")
|
||||||
|
assert len(parts) == 3
|
||||||
|
assert parts[0] == "myremote"
|
||||||
|
assert parts[2] == "file.rpm"
|
||||||
|
assert len(parts[1]) == 16 # SHA-256 hex truncated to 16 chars
|
||||||
|
|
||||||
|
def test_key_uses_sha256_of_directory_path(self, storage):
|
||||||
|
# Pin the hash algorithm, truncation length, and format in one assertion
|
||||||
|
key = storage.get_object_key("myremote", "some/path/to/file.rpm")
|
||||||
|
expected_hash = hashlib.sha256(b"some/path/to").hexdigest()[:16]
|
||||||
|
assert key == f"myremote/{expected_hash}/file.rpm"
|
||||||
|
|
||||||
|
def test_different_remotes_give_different_keys(self, storage):
|
||||||
|
k1 = storage.get_object_key("remote-a", "path/to/file.rpm")
|
||||||
|
k2 = storage.get_object_key("remote-b", "path/to/file.rpm")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_different_directories_give_different_keys(self, storage):
|
||||||
|
k1 = storage.get_object_key("myremote", "path/version-1/file.rpm")
|
||||||
|
k2 = storage.get_object_key("myremote", "path/version-2/file.rpm")
|
||||||
|
assert k1 != k2
|
||||||
|
assert k1.split("/")[-1] == k2.split("/")[-1] == "file.rpm"
|
||||||
|
|
||||||
|
def test_leading_slash_stripped(self, storage):
|
||||||
|
k1 = storage.get_object_key("myremote", "/path/to/file.rpm")
|
||||||
|
k2 = storage.get_object_key("myremote", "path/to/file.rpm")
|
||||||
|
assert k1 == k2
|
||||||
|
|
||||||
|
def test_file_with_no_directory(self, storage):
|
||||||
|
key = storage.get_object_key("myremote", "file.rpm")
|
||||||
|
assert key == "myremote/file.rpm"
|
||||||
|
|
||||||
|
def test_docker_blob_uses_digest_path(self, storage):
|
||||||
|
digest = "a" * 64 # realistic 64-char SHA-256 hex string
|
||||||
|
path = f"library/nginx/blobs/sha256:{digest}"
|
||||||
|
key = storage.get_object_key("dockerhub", path)
|
||||||
|
assert key == f"dockerhub/blobs/sha256/{digest}"
|
||||||
|
|
||||||
|
def test_docker_blob_deduplication_across_images(self, storage):
|
||||||
|
"""Same blob digest pulled from different images maps to the same S3 key."""
|
||||||
|
digest = "deadbeef" * 8 # 64-char hex
|
||||||
|
k1 = storage.get_object_key("dockerhub", f"library/nginx/blobs/sha256:{digest}")
|
||||||
|
k2 = storage.get_object_key("dockerhub", f"library/ubuntu/blobs/sha256:{digest}")
|
||||||
|
assert k1 == k2
|
||||||
|
|
||||||
|
def test_docker_blob_different_digests_different_keys(self, storage):
|
||||||
|
k1 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "a" * 64)
|
||||||
|
k2 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "b" * 64)
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_docker_blob_different_remotes_different_keys(self, storage):
|
||||||
|
digest = "abc" * 21 + "d" # 64-char hex
|
||||||
|
k1 = storage.get_object_key("remote-a", f"library/nginx/blobs/sha256:{digest}")
|
||||||
|
k2 = storage.get_object_key("remote-b", f"library/nginx/blobs/sha256:{digest}")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_url
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetUrl:
|
||||||
|
def test_returns_http_url_for_insecure_endpoint(self, storage):
|
||||||
|
url = storage.get_url("myremote/abc123/file.rpm")
|
||||||
|
assert url == "http://localhost:9000/testbucket/myremote/abc123/file.rpm"
|
||||||
|
|
||||||
|
def test_returns_http_url_for_secure_storage(self):
|
||||||
|
with patch("boto3.client", return_value=MagicMock()):
|
||||||
|
s = S3Storage(endpoint="s3.example.com", access_key="k", secret_key="s", bucket="b", secure=True)
|
||||||
|
s.client = MagicMock()
|
||||||
|
# get_url uses http:// always (direct internal access address, not the S3 protocol)
|
||||||
|
assert s.get_url("path/to/file.rpm") == "http://s3.example.com/b/path/to/file.rpm"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# upload / download_object
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpload:
|
||||||
|
def test_upload_returns_s3_uri(self, storage):
|
||||||
|
storage.client.put_object.return_value = {}
|
||||||
|
result = storage.upload("myremote/abc123/file.rpm", b"content")
|
||||||
|
assert result == "s3://testbucket/myremote/abc123/file.rpm"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadObject:
|
||||||
|
def test_download_object_raises_404_on_client_error(self, storage):
|
||||||
|
storage.client.get_object.side_effect = ClientError(
|
||||||
|
{"Error": {"Code": "NoSuchKey", "Message": "The specified key does not exist"}},
|
||||||
|
"GetObject",
|
||||||
|
)
|
||||||
|
with pytest.raises(HTTPException) as exc_info:
|
||||||
|
storage.download_object("nonexistent/key")
|
||||||
|
assert exc_info.value.status_code == 404
|
||||||
Reference in New Issue
Block a user