Initial implementation of generic artifact storage system
- FastAPI-based caching proxy for remote file servers
- YAML configuration for multiple remotes (GitHub, Gitea, HashiCorp, etc.)
- Direct URL API: /api/v1/remote/{remote}/{path} with auto-download and caching
- Pattern-based access control with regex filtering
- S3/MinIO backend storage with predictable paths
- Docker Compose setup with MinIO for local development
This commit is contained in:
commit
46711eec6a
47
.gitignore
vendored
Normal file
47
.gitignore
vendored
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# Virtual environment
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
remotes.yaml
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# uv
|
||||||
|
uv.lock
|
||||||
|
|
||||||
|
# Docker volumes
|
||||||
|
minio_data/
|
||||||
49
Dockerfile
Normal file
49
Dockerfile
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Use Alpine Linux as base image
|
||||||
|
FROM python:3.11-alpine
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
gcc \
|
||||||
|
musl-dev \
|
||||||
|
libffi-dev \
|
||||||
|
postgresql-dev \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
tar
|
||||||
|
|
||||||
|
# Install uv
|
||||||
|
ARG PACKAGE_VERSION=0.9.21
|
||||||
|
RUN wget -O /app/uv-x86_64-unknown-linux-musl.tar.gz https://github.com/astral-sh/uv/releases/download/${PACKAGE_VERSION}/uv-x86_64-unknown-linux-musl.tar.gz && \
|
||||||
|
tar xf /app/uv-x86_64-unknown-linux-musl.tar.gz -C /app && \
|
||||||
|
mv /app/uv-x86_64-unknown-linux-musl/uv /usr/local/bin/uv && \
|
||||||
|
rm -rf /app/uv-x86_64-unknown-linux-musl* && \
|
||||||
|
chmod +x /usr/local/bin/uv && \
|
||||||
|
uv --version
|
||||||
|
|
||||||
|
# Create non-root user first
|
||||||
|
RUN adduser -D -s /bin/sh appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
|
||||||
|
# Copy dependency files and change ownership
|
||||||
|
COPY --chown=appuser:appuser pyproject.toml uv.lock README.md ./
|
||||||
|
|
||||||
|
# Switch to appuser and install Python dependencies
|
||||||
|
USER appuser
|
||||||
|
RUN uv sync --frozen
|
||||||
|
|
||||||
|
# Copy application source
|
||||||
|
COPY --chown=appuser:appuser src/ ./src/
|
||||||
|
COPY --chown=appuser:appuser remotes.yaml ./
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8000/health || exit 1
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
CMD ["uv", "run", "python", "-m", "src.artifactapi.main"]
|
||||||
47
Makefile
Normal file
47
Makefile
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
.PHONY: build install dev clean test lint format docker-build docker-up docker-down docker-logs docker-rebuild docker-clean docker-restart
|
||||||
|
|
||||||
|
build:
|
||||||
|
docker build --no-cache -t artifactapi:latest .
|
||||||
|
|
||||||
|
install: build
|
||||||
|
|
||||||
|
docker-build: build
|
||||||
|
|
||||||
|
dev: build
|
||||||
|
uv sync --dev
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf .venv
|
||||||
|
rm -rf build/
|
||||||
|
rm -rf dist/
|
||||||
|
rm -rf *.egg-info/
|
||||||
|
|
||||||
|
test:
|
||||||
|
uv run pytest
|
||||||
|
|
||||||
|
lint:
|
||||||
|
uv run ruff check --fix .
|
||||||
|
|
||||||
|
format:
|
||||||
|
uv run ruff format .
|
||||||
|
|
||||||
|
run:
|
||||||
|
uv run python -m src.artifactapi.main
|
||||||
|
|
||||||
|
docker-up:
|
||||||
|
docker-compose up --build --force-recreate -d
|
||||||
|
|
||||||
|
docker-down:
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
docker-logs:
|
||||||
|
docker-compose logs -f
|
||||||
|
|
||||||
|
docker-rebuild:
|
||||||
|
docker-compose build --no-cache
|
||||||
|
|
||||||
|
docker-clean:
|
||||||
|
docker-compose down -v --remove-orphans
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
|
docker-restart: docker-down docker-up
|
||||||
664
README.md
Normal file
664
README.md
Normal file
@ -0,0 +1,664 @@
|
|||||||
|
# Artifact Storage System
|
||||||
|
|
||||||
|
A generic FastAPI-based artifact caching system that downloads and stores files from remote sources (GitHub, Gitea, HashiCorp, etc.) in S3-compatible storage with configuration-based access control.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Generic Remote Support**: Works with any HTTP-based file server (GitHub, Gitea, HashiCorp, custom servers)
|
||||||
|
- **Configuration-Based**: YAML configuration for remotes, patterns, and access control
|
||||||
|
- **Direct URL API**: Access cached files via clean URLs like `/api/github/owner/repo/path/file.tar.gz`
|
||||||
|
- **Pattern Filtering**: Regex-based inclusion patterns for security and organization
|
||||||
|
- **Smart Caching**: Automatic download and cache on first access, serve from cache afterward
|
||||||
|
- **S3 Storage**: MinIO/S3 backend with predictable paths
|
||||||
|
- **Content-Type Detection**: Automatic MIME type detection for downloads
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The system acts as a caching proxy that:
|
||||||
|
1. Receives requests via the `/api/{remote}/{path}` endpoint
|
||||||
|
2. Checks if the file is already cached
|
||||||
|
3. If not cached, downloads from the configured remote and caches it
|
||||||
|
4. Serves the file with appropriate headers and content types
|
||||||
|
5. Enforces access control via configurable regex patterns
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
1. Start MinIO container:
|
||||||
|
```bash
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create virtual environment and install dependencies:
|
||||||
|
```bash
|
||||||
|
uv venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
uv pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Start the API:
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Access artifacts directly via URL:
|
||||||
|
```bash
|
||||||
|
# This will download and cache the file on first access
|
||||||
|
xh GET localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
||||||
|
|
||||||
|
# Subsequent requests serve from cache (see X-Artifact-Source: cache header)
|
||||||
|
curl -I localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Direct Access
|
||||||
|
- `GET /api/{remote}/{path}` - Direct access to artifacts with auto-caching
|
||||||
|
|
||||||
|
### Management
|
||||||
|
- `GET /` - API info and available remotes
|
||||||
|
- `GET /health` - Health check
|
||||||
|
- `GET /config` - View current configuration
|
||||||
|
- `POST /cache-artifact` - Batch cache artifacts matching pattern
|
||||||
|
- `GET /artifacts/{remote}` - List cached artifacts
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The system uses `remotes.yaml` to define remote repositories and access patterns. All other configuration is provided via environment variables.
|
||||||
|
|
||||||
|
### remotes.yaml Structure
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
remote-name:
|
||||||
|
base_url: "https://example.com" # Base URL for the remote
|
||||||
|
type: "remote" # Type: "remote" or "local"
|
||||||
|
package: "generic" # Package type: "generic", "alpine", "rpm"
|
||||||
|
description: "Human readable description"
|
||||||
|
include_patterns: # Regex patterns for allowed files
|
||||||
|
- "pattern1"
|
||||||
|
- "pattern2"
|
||||||
|
cache: # Cache configuration (optional)
|
||||||
|
file_ttl: 0 # File cache TTL (0 = indefinite)
|
||||||
|
index_ttl: 300 # Index file TTL in seconds
|
||||||
|
```
|
||||||
|
|
||||||
|
### Remote Types
|
||||||
|
|
||||||
|
#### Generic Remotes
|
||||||
|
For general file hosting (GitHub releases, custom servers):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
include_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0 # Cache files indefinitely
|
||||||
|
index_ttl: 0 # No index files for generic remotes
|
||||||
|
|
||||||
|
hashicorp-releases:
|
||||||
|
base_url: "https://releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "HashiCorp product releases"
|
||||||
|
include_patterns:
|
||||||
|
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||||
|
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||||
|
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0
|
||||||
|
index_ttl: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Package Repository Remotes
|
||||||
|
For Linux package repositories with index files:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
include_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$" # Only x86_64 packages
|
||||||
|
cache:
|
||||||
|
file_ttl: 0 # Cache packages indefinitely
|
||||||
|
index_ttl: 7200 # Cache APKINDEX.tar.gz for 2 hours
|
||||||
|
|
||||||
|
almalinux:
|
||||||
|
base_url: "http://mirror.aarnet.edu.au/pub/almalinux"
|
||||||
|
type: "remote"
|
||||||
|
package: "rpm"
|
||||||
|
description: "AlmaLinux RPM package repository"
|
||||||
|
include_patterns:
|
||||||
|
- ".*/x86_64/.*\\.rpm$"
|
||||||
|
- ".*/noarch/.*\\.rpm$"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0
|
||||||
|
index_ttl: 7200 # Cache metadata files for 2 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Local Repositories
|
||||||
|
For storing custom artifacts:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remotes:
|
||||||
|
local-generic:
|
||||||
|
type: "local"
|
||||||
|
package: "generic"
|
||||||
|
description: "Local generic file repository"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0
|
||||||
|
index_ttl: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Include Patterns
|
||||||
|
|
||||||
|
Include patterns are regular expressions that control which files can be accessed:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
include_patterns:
|
||||||
|
# Specific project patterns
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
|
||||||
|
# File extension patterns
|
||||||
|
- ".*\\.tar\\.gz$"
|
||||||
|
- ".*\\.zip$"
|
||||||
|
- ".*\\.rpm$"
|
||||||
|
|
||||||
|
# Architecture-specific patterns
|
||||||
|
- ".*/x86_64/.*"
|
||||||
|
- ".*/linux-amd64/.*"
|
||||||
|
|
||||||
|
# Version-specific patterns
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Security Note**: Only files matching at least one include pattern are accessible. Files not matching any pattern return HTTP 403.
|
||||||
|
|
||||||
|
### Cache Configuration
|
||||||
|
|
||||||
|
Control how long different file types are cached:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
cache:
|
||||||
|
file_ttl: 0 # Regular files (0 = cache indefinitely)
|
||||||
|
index_ttl: 300 # Index files like APKINDEX.tar.gz (seconds)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Index Files**: Repository metadata files that change frequently:
|
||||||
|
- Alpine: `APKINDEX.tar.gz`
|
||||||
|
- RPM: `repomd.xml`, `*-primary.xml.gz`, etc.
|
||||||
|
- These are automatically detected and use `index_ttl`
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
All runtime configuration comes from environment variables:
|
||||||
|
|
||||||
|
**Database Configuration:**
|
||||||
|
- `DBHOST` - PostgreSQL host
|
||||||
|
- `DBPORT` - PostgreSQL port
|
||||||
|
- `DBUSER` - PostgreSQL username
|
||||||
|
- `DBPASS` - PostgreSQL password
|
||||||
|
- `DBNAME` - PostgreSQL database name
|
||||||
|
|
||||||
|
**Redis Configuration:**
|
||||||
|
- `REDIS_URL` - Redis connection URL (e.g., `redis://localhost:6379`)
|
||||||
|
|
||||||
|
**S3/MinIO Configuration:**
|
||||||
|
- `MINIO_ENDPOINT` - MinIO/S3 endpoint
|
||||||
|
- `MINIO_ACCESS_KEY` - S3 access key
|
||||||
|
- `MINIO_SECRET_KEY` - S3 secret key
|
||||||
|
- `MINIO_BUCKET` - S3 bucket name
|
||||||
|
- `MINIO_SECURE` - Use HTTPS (`true`/`false`)
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Direct File Access
|
||||||
|
```bash
|
||||||
|
# Access GitHub releases
|
||||||
|
curl localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64.tar.gz
|
||||||
|
|
||||||
|
# Access HashiCorp releases (when configured)
|
||||||
|
curl localhost:8000/api/hashicorp/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip
|
||||||
|
|
||||||
|
# Access custom remotes
|
||||||
|
curl localhost:8000/api/custom/path/to/file.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
### Response Headers
|
||||||
|
- `X-Artifact-Source: cache|remote` - Indicates if served from cache or freshly downloaded
|
||||||
|
- `Content-Type` - Automatically detected (application/gzip, application/zip, etc.)
|
||||||
|
- `Content-Disposition` - Download filename
|
||||||
|
- `Content-Length` - File size
|
||||||
|
|
||||||
|
### Pattern Enforcement
|
||||||
|
Access is controlled by regex patterns in the configuration. Requests for files not matching any pattern return HTTP 403.
|
||||||
|
|
||||||
|
## Storage Path Format
|
||||||
|
|
||||||
|
Files are stored with keys like:
|
||||||
|
- `{remote_name}/{path_hash}/{filename}` for direct API access
|
||||||
|
- `{hostname}/{url_hash}/{filename}` for legacy batch operations
|
||||||
|
|
||||||
|
Example: `github/a1b2c3d4e5f6g7h8/terragrunt_linux_amd64.tar.gz`
|
||||||
|
|
||||||
|
## Kubernetes Deployment
|
||||||
|
|
||||||
|
Deploy the artifact storage system to Kubernetes using the following manifests:
|
||||||
|
|
||||||
|
### 1. Namespace
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: artifact-storage
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. ConfigMap for remotes.yaml
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: artifactapi-config
|
||||||
|
namespace: artifact-storage
|
||||||
|
data:
|
||||||
|
remotes.yaml: |
|
||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
include_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "lxc/incus/.*\\.tar\\.gz$"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0
|
||||||
|
index_ttl: 0
|
||||||
|
|
||||||
|
hashicorp-releases:
|
||||||
|
base_url: "https://releases.hashicorp.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "HashiCorp product releases"
|
||||||
|
include_patterns:
|
||||||
|
- "terraform/.*terraform_.*_linux_amd64\\.zip$"
|
||||||
|
- "vault/.*vault_.*_linux_amd64\\.zip$"
|
||||||
|
- "consul/.*/consul_.*_linux_amd64\\.zip$"
|
||||||
|
cache:
|
||||||
|
file_ttl: 0
|
||||||
|
index_ttl: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Secret for Environment Variables
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: artifactapi-secret
|
||||||
|
namespace: artifact-storage
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
DBHOST: "postgres-service"
|
||||||
|
DBPORT: "5432"
|
||||||
|
DBUSER: "artifacts"
|
||||||
|
DBPASS: "artifacts123"
|
||||||
|
DBNAME: "artifacts"
|
||||||
|
REDIS_URL: "redis://redis-service:6379"
|
||||||
|
MINIO_ENDPOINT: "minio-service:9000"
|
||||||
|
MINIO_ACCESS_KEY: "minioadmin"
|
||||||
|
MINIO_SECRET_KEY: "minioadmin"
|
||||||
|
MINIO_BUCKET: "artifacts"
|
||||||
|
MINIO_SECURE: "false"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. PostgreSQL Deployment
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: postgres
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: postgres
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:15-alpine
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: artifacts
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
value: artifacts
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
value: artifacts123
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
volumeMounts:
|
||||||
|
- name: postgres-storage
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "artifacts", "-d", "artifacts"]
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: postgres-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: postgres-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: postgres-service
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: postgres
|
||||||
|
ports:
|
||||||
|
- port: 5432
|
||||||
|
targetPort: 5432
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: postgres-pvc
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Redis Deployment
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
command: ["redis-server", "--save", "20", "1"]
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
volumeMounts:
|
||||||
|
- name: redis-storage
|
||||||
|
mountPath: /data
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: redis-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: redis-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis-service
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: redis-pvc
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. MinIO Deployment
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: minio
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: minio
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: minio
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: minio
|
||||||
|
image: minio/minio:latest
|
||||||
|
command: ["minio", "server", "/data", "--console-address", ":9001"]
|
||||||
|
env:
|
||||||
|
- name: MINIO_ROOT_USER
|
||||||
|
value: minioadmin
|
||||||
|
- name: MINIO_ROOT_PASSWORD
|
||||||
|
value: minioadmin
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
- containerPort: 9001
|
||||||
|
volumeMounts:
|
||||||
|
- name: minio-storage
|
||||||
|
mountPath: /data
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /minio/health/live
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: minio-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: minio-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: minio-service
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: minio
|
||||||
|
ports:
|
||||||
|
- name: api
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
- name: console
|
||||||
|
port: 9001
|
||||||
|
targetPort: 9001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: minio-pvc
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 50Gi
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Artifact API Deployment
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: artifactapi
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: artifactapi
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: artifactapi
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: artifactapi
|
||||||
|
image: artifactapi:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: artifactapi-secret
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
mountPath: /app/remotes.yaml
|
||||||
|
subPath: remotes.yaml
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8000
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8000
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 5
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "250m"
|
||||||
|
limits:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
volumes:
|
||||||
|
- name: config-volume
|
||||||
|
configMap:
|
||||||
|
name: artifactapi-config
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: artifactapi-service
|
||||||
|
namespace: artifact-storage
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: artifactapi
|
||||||
|
ports:
|
||||||
|
- port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
type: ClusterIP
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8. Ingress (Optional)
|
||||||
|
```yaml
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: artifactapi-ingress
|
||||||
|
namespace: artifact-storage
|
||||||
|
annotations:
|
||||||
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
||||||
|
nginx.ingress.kubernetes.io/proxy-body-size: "10g"
|
||||||
|
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: artifacts.example.com
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: artifactapi-service
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deployment Commands
|
||||||
|
```bash
|
||||||
|
# Create namespace
|
||||||
|
kubectl apply -f namespace.yaml
|
||||||
|
|
||||||
|
# Deploy PostgreSQL, Redis, and MinIO
|
||||||
|
kubectl apply -f postgres.yaml
|
||||||
|
kubectl apply -f redis.yaml
|
||||||
|
kubectl apply -f minio.yaml
|
||||||
|
|
||||||
|
# Wait for databases to be ready
|
||||||
|
kubectl wait --for=condition=ready pod -l app=postgres -n artifact-storage --timeout=300s
|
||||||
|
kubectl wait --for=condition=ready pod -l app=redis -n artifact-storage --timeout=300s
|
||||||
|
kubectl wait --for=condition=ready pod -l app=minio -n artifact-storage --timeout=300s
|
||||||
|
|
||||||
|
# Deploy configuration and application
|
||||||
|
kubectl apply -f configmap.yaml
|
||||||
|
kubectl apply -f secret.yaml
|
||||||
|
kubectl apply -f artifactapi.yaml
|
||||||
|
|
||||||
|
# Optional: Deploy ingress
|
||||||
|
kubectl apply -f ingress.yaml
|
||||||
|
|
||||||
|
# Check deployment status
|
||||||
|
kubectl get pods -n artifact-storage
|
||||||
|
kubectl logs -f deployment/artifactapi -n artifact-storage
|
||||||
|
```
|
||||||
|
|
||||||
|
### Access the API
|
||||||
|
```bash
|
||||||
|
# Port-forward to access locally
|
||||||
|
kubectl port-forward service/artifactapi-service 8000:8000 -n artifact-storage
|
||||||
|
|
||||||
|
# Test the API
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
curl http://localhost:8000/
|
||||||
|
|
||||||
|
# Access artifacts
|
||||||
|
curl "http://localhost:8000/api/github/gruntwork-io/terragrunt/releases/download/v0.96.1/terragrunt_linux_amd64"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Notes for Production
|
||||||
|
- Use proper secrets management (e.g., Vault, Sealed Secrets)
|
||||||
|
- Configure resource limits and requests appropriately
|
||||||
|
- Set up monitoring and alerting
|
||||||
|
- Use external managed databases for production workloads
|
||||||
|
- Configure backup strategies for persistent volumes
|
||||||
|
- Set up proper TLS certificates for ingress
|
||||||
|
- Consider using StatefulSets for databases with persistent storage
|
||||||
85
docker-compose.yml
Normal file
85
docker-compose.yml
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
artifactapi:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
no_cache: true
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- DBHOST=postgres
|
||||||
|
- DBPORT=5432
|
||||||
|
- DBUSER=artifacts
|
||||||
|
- DBPASS=artifacts123
|
||||||
|
- DBNAME=artifacts
|
||||||
|
- REDIS_URL=redis://redis:6379
|
||||||
|
- MINIO_ENDPOINT=minio:9000
|
||||||
|
- MINIO_ACCESS_KEY=minioadmin
|
||||||
|
- MINIO_SECRET_KEY=minioadmin
|
||||||
|
- MINIO_BUCKET=artifacts
|
||||||
|
- MINIO_SECURE=false
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
minio:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
minio:
|
||||||
|
image: minio/minio:latest
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: minioadmin
|
||||||
|
MINIO_ROOT_PASSWORD: minioadmin
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
volumes:
|
||||||
|
- minio_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 20s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
command: redis-server --save 20 1
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: artifacts
|
||||||
|
POSTGRES_USER: artifacts
|
||||||
|
POSTGRES_PASSWORD: artifacts123
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U artifacts -d artifacts"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
minio_data:
|
||||||
|
redis_data:
|
||||||
|
postgres_data:
|
||||||
43
pyproject.toml
Normal file
43
pyproject.toml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
[project]
|
||||||
|
name = "artifactapi"
|
||||||
|
version = "2.0.0"
|
||||||
|
description = "Generic artifact caching system with support for various package managers"
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"fastapi>=0.104.0",
|
||||||
|
"uvicorn[standard]>=0.24.0",
|
||||||
|
"httpx>=0.25.0",
|
||||||
|
"redis>=5.0.0",
|
||||||
|
"boto3>=1.29.0",
|
||||||
|
"psycopg2-binary>=2.9.0",
|
||||||
|
"pyyaml>=6.0",
|
||||||
|
"lxml>=4.9.0",
|
||||||
|
"prometheus-client>=0.19.0",
|
||||||
|
"python-multipart>=0.0.6",
|
||||||
|
]
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
readme = "README.md"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
artifactapi = "artifactapi.main:main"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.metadata]
|
||||||
|
allow-direct-references = true
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["src/artifactapi"]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=7.4.0",
|
||||||
|
"pytest-asyncio>=0.21.0",
|
||||||
|
"black>=23.9.0",
|
||||||
|
"isort>=5.12.0",
|
||||||
|
"mypy>=1.6.0",
|
||||||
|
"ruff>=0.1.0",
|
||||||
|
]
|
||||||
1
src/artifactapi/__init__.py
Normal file
1
src/artifactapi/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# Artifact API package
|
||||||
69
src/artifactapi/cache.py
Normal file
69
src/artifactapi/cache.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import time
|
||||||
|
import hashlib
|
||||||
|
import redis
|
||||||
|
|
||||||
|
|
||||||
|
class RedisCache:
|
||||||
|
def __init__(self, redis_url: str):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
|
# Test connection
|
||||||
|
self.client.ping()
|
||||||
|
self.available = True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Redis not available: {e}")
|
||||||
|
self.client = None
|
||||||
|
self.available = False
|
||||||
|
|
||||||
|
def is_index_file(self, file_path: str) -> bool:
|
||||||
|
"""Check if the file is an index file that should have TTL"""
|
||||||
|
return (
|
||||||
|
file_path.endswith("APKINDEX.tar.gz")
|
||||||
|
or file_path.endswith("Packages.gz")
|
||||||
|
or file_path.endswith("repomd.xml")
|
||||||
|
or "repodata/" in file_path
|
||||||
|
and file_path.endswith((".xml", ".xml.gz", ".xml.bz2", ".xml.xz"))
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_index_cache_key(self, remote_name: str, path: str) -> str:
|
||||||
|
"""Generate cache key for index files"""
|
||||||
|
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
|
def is_index_valid(
|
||||||
|
self, remote_name: str, path: str, ttl_override: int = None
|
||||||
|
) -> bool:
|
||||||
|
"""Check if index file is still valid (not expired)"""
|
||||||
|
if not self.available:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
|
return self.client.exists(key) > 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||||
|
"""Mark index file as cached with TTL"""
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
|
self.client.setex(key, ttl, str(int(time.time())))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||||
|
"""Remove expired index from S3 storage"""
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the S3 key and remove it
|
||||||
|
s3_key = storage.get_object_key_from_path(remote_name, path)
|
||||||
|
if storage.exists(s3_key):
|
||||||
|
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
120
src/artifactapi/config.py
Normal file
120
src/artifactapi/config.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import yaml
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
def __init__(self, config_file: str = "remotes.yaml"):
|
||||||
|
self.config_file = config_file
|
||||||
|
self._last_modified = 0
|
||||||
|
self.config = self._load_config()
|
||||||
|
|
||||||
|
def _load_config(self) -> dict:
|
||||||
|
try:
|
||||||
|
with open(self.config_file, "r") as f:
|
||||||
|
if self.config_file.endswith(".yaml") or self.config_file.endswith(
|
||||||
|
".yml"
|
||||||
|
):
|
||||||
|
return yaml.safe_load(f)
|
||||||
|
else:
|
||||||
|
return json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return {"remotes": {}}
|
||||||
|
|
||||||
|
def _check_reload(self) -> None:
|
||||||
|
"""Check if config file has been modified and reload if needed"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
|
||||||
|
current_modified = os.path.getmtime(self.config_file)
|
||||||
|
if current_modified > self._last_modified:
|
||||||
|
self._last_modified = current_modified
|
||||||
|
self.config = self._load_config()
|
||||||
|
print(f"Config reloaded from {self.config_file}")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_remote_config(self, remote_name: str) -> Optional[dict]:
|
||||||
|
self._check_reload()
|
||||||
|
return self.config.get("remotes", {}).get(remote_name)
|
||||||
|
|
||||||
|
def get_repository_patterns(self, remote_name: str, repo_path: str) -> list:
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return []
|
||||||
|
|
||||||
|
repositories = remote_config.get("repositories", {})
|
||||||
|
|
||||||
|
# Handle both dict (GitHub style) and list (Alpine style) repositories
|
||||||
|
if isinstance(repositories, dict):
|
||||||
|
repo_config = repositories.get(repo_path)
|
||||||
|
if repo_config:
|
||||||
|
patterns = repo_config.get("include_patterns", [])
|
||||||
|
else:
|
||||||
|
patterns = remote_config.get("include_patterns", [])
|
||||||
|
elif isinstance(repositories, list):
|
||||||
|
# For Alpine, repositories is just a list of allowed repo names
|
||||||
|
# Pattern matching is handled by the main include_patterns
|
||||||
|
patterns = remote_config.get("include_patterns", [])
|
||||||
|
else:
|
||||||
|
patterns = remote_config.get("include_patterns", [])
|
||||||
|
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
def get_s3_config(self) -> dict:
|
||||||
|
"""Get S3 configuration from environment variables"""
|
||||||
|
endpoint = os.getenv("MINIO_ENDPOINT")
|
||||||
|
access_key = os.getenv("MINIO_ACCESS_KEY")
|
||||||
|
secret_key = os.getenv("MINIO_SECRET_KEY")
|
||||||
|
bucket = os.getenv("MINIO_BUCKET")
|
||||||
|
|
||||||
|
if not endpoint:
|
||||||
|
raise ValueError("MINIO_ENDPOINT environment variable is required")
|
||||||
|
if not access_key:
|
||||||
|
raise ValueError("MINIO_ACCESS_KEY environment variable is required")
|
||||||
|
if not secret_key:
|
||||||
|
raise ValueError("MINIO_SECRET_KEY environment variable is required")
|
||||||
|
if not bucket:
|
||||||
|
raise ValueError("MINIO_BUCKET environment variable is required")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"endpoint": endpoint,
|
||||||
|
"access_key": access_key,
|
||||||
|
"secret_key": secret_key,
|
||||||
|
"bucket": bucket,
|
||||||
|
"secure": os.getenv("MINIO_SECURE", "false").lower() == "true",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_redis_config(self) -> dict:
|
||||||
|
"""Get Redis configuration from environment variables"""
|
||||||
|
redis_url = os.getenv("REDIS_URL")
|
||||||
|
if not redis_url:
|
||||||
|
raise ValueError("REDIS_URL environment variable is required")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"url": redis_url
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_database_config(self) -> dict:
|
||||||
|
"""Get database configuration from environment variables"""
|
||||||
|
db_host = os.getenv("DBHOST")
|
||||||
|
db_port = os.getenv("DBPORT")
|
||||||
|
db_user = os.getenv("DBUSER")
|
||||||
|
db_pass = os.getenv("DBPASS")
|
||||||
|
db_name = os.getenv("DBNAME")
|
||||||
|
|
||||||
|
if not all([db_host, db_port, db_user, db_pass, db_name]):
|
||||||
|
missing = [var for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)] if not val]
|
||||||
|
raise ValueError(f"All database environment variables are required: {', '.join(missing)}")
|
||||||
|
|
||||||
|
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
||||||
|
return {"url": db_url}
|
||||||
|
|
||||||
|
def get_cache_config(self, remote_name: str) -> dict:
|
||||||
|
"""Get cache configuration for a specific remote"""
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
return remote_config.get("cache", {})
|
||||||
282
src/artifactapi/database.py
Normal file
282
src/artifactapi/database.py
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
|
||||||
|
|
||||||
|
class DatabaseManager:
|
||||||
|
def __init__(self, db_url: str):
|
||||||
|
self.db_url = db_url
|
||||||
|
self.available = False
|
||||||
|
self._init_database()
|
||||||
|
|
||||||
|
def _init_database(self):
|
||||||
|
"""Initialize database connection and create schema if needed"""
|
||||||
|
try:
|
||||||
|
self.connection = psycopg2.connect(self.db_url)
|
||||||
|
self.connection.autocommit = True
|
||||||
|
self._create_schema()
|
||||||
|
self.available = True
|
||||||
|
print("Database connection established")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Database not available: {e}")
|
||||||
|
self.available = False
|
||||||
|
|
||||||
|
def _create_schema(self):
|
||||||
|
"""Create tables if they don't exist"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
# Create table to map S3 keys to remote names
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
s3_key VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
remote_name VARCHAR(100) NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
size_bytes BIGINT,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS local_files (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
repository_name VARCHAR(100) NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
s3_key VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
size_bytes BIGINT NOT NULL,
|
||||||
|
sha256_sum VARCHAR(64) NOT NULL,
|
||||||
|
content_type VARCHAR(100),
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
UNIQUE(repository_name, file_path)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create indexes separately
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)"
|
||||||
|
)
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)"
|
||||||
|
)
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)"
|
||||||
|
)
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)"
|
||||||
|
)
|
||||||
|
print("Database schema initialized")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating schema: {e}")
|
||||||
|
|
||||||
|
def record_artifact_mapping(
|
||||||
|
self, s3_key: str, remote_name: str, file_path: str, size_bytes: int
|
||||||
|
):
|
||||||
|
"""Record mapping between S3 key and remote"""
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO artifact_mappings (s3_key, remote_name, file_path, size_bytes)
|
||||||
|
VALUES (%s, %s, %s, %s)
|
||||||
|
ON CONFLICT (s3_key)
|
||||||
|
DO UPDATE SET
|
||||||
|
remote_name = EXCLUDED.remote_name,
|
||||||
|
file_path = EXCLUDED.file_path,
|
||||||
|
size_bytes = EXCLUDED.size_bytes
|
||||||
|
""",
|
||||||
|
(s3_key, remote_name, file_path, size_bytes),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error recording artifact mapping: {e}")
|
||||||
|
|
||||||
|
def get_storage_by_remote(self) -> dict[str, int]:
|
||||||
|
"""Get storage size breakdown by remote from database"""
|
||||||
|
if not self.available:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT remote_name, SUM(size_bytes) as total_size
|
||||||
|
FROM artifact_mappings
|
||||||
|
GROUP BY remote_name
|
||||||
|
""")
|
||||||
|
results = cursor.fetchall()
|
||||||
|
return {row["remote_name"]: row["total_size"] or 0 for row in results}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting storage by remote: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_remote_for_s3_key(self, s3_key: str) -> Optional[str]:
|
||||||
|
"""Get remote name for given S3 key"""
|
||||||
|
if not self.available:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT remote_name FROM artifact_mappings WHERE s3_key = %s",
|
||||||
|
(s3_key,),
|
||||||
|
)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
return result[0] if result else None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting remote for S3 key: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def add_local_file(
|
||||||
|
self,
|
||||||
|
repository_name: str,
|
||||||
|
file_path: str,
|
||||||
|
s3_key: str,
|
||||||
|
size_bytes: int,
|
||||||
|
sha256_sum: str,
|
||||||
|
content_type: str = None,
|
||||||
|
):
|
||||||
|
"""Add a file to local repository"""
|
||||||
|
if not self.available:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO local_files (repository_name, file_path, s3_key, size_bytes, sha256_sum, content_type)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
repository_name,
|
||||||
|
file_path,
|
||||||
|
s3_key,
|
||||||
|
size_bytes,
|
||||||
|
sha256_sum,
|
||||||
|
content_type,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.connection.commit()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error adding local file: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||||
|
"""Get metadata for a local file"""
|
||||||
|
if not self.available:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT repository_name, file_path, s3_key, size_bytes, sha256_sum, content_type, created_at, uploaded_at
|
||||||
|
FROM local_files
|
||||||
|
WHERE repository_name = %s AND file_path = %s
|
||||||
|
""",
|
||||||
|
(repository_name, file_path),
|
||||||
|
)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
if result:
|
||||||
|
return {
|
||||||
|
"repository_name": result[0],
|
||||||
|
"file_path": result[1],
|
||||||
|
"s3_key": result[2],
|
||||||
|
"size_bytes": result[3],
|
||||||
|
"sha256_sum": result[4],
|
||||||
|
"content_type": result[5],
|
||||||
|
"created_at": result[6],
|
||||||
|
"uploaded_at": result[7],
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting local file metadata: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||||
|
"""List files in local repository with optional path prefix"""
|
||||||
|
if not self.available:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
if prefix:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT file_path, size_bytes, sha256_sum, content_type, created_at, uploaded_at
|
||||||
|
FROM local_files
|
||||||
|
WHERE repository_name = %s AND file_path LIKE %s
|
||||||
|
ORDER BY file_path
|
||||||
|
""",
|
||||||
|
(repository_name, f"{prefix}%"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT file_path, size_bytes, sha256_sum, content_type, created_at, uploaded_at
|
||||||
|
FROM local_files
|
||||||
|
WHERE repository_name = %s
|
||||||
|
ORDER BY file_path
|
||||||
|
""",
|
||||||
|
(repository_name,),
|
||||||
|
)
|
||||||
|
|
||||||
|
results = cursor.fetchall()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"file_path": result[0],
|
||||||
|
"size_bytes": result[1],
|
||||||
|
"sha256_sum": result[2],
|
||||||
|
"content_type": result[3],
|
||||||
|
"created_at": result[4],
|
||||||
|
"uploaded_at": result[5],
|
||||||
|
}
|
||||||
|
for result in results
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error listing local files: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def delete_local_file(self, repository_name: str, file_path: str):
|
||||||
|
"""Delete a file from local repository"""
|
||||||
|
if not self.available:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
DELETE FROM local_files
|
||||||
|
WHERE repository_name = %s AND file_path = %s
|
||||||
|
RETURNING s3_key
|
||||||
|
""",
|
||||||
|
(repository_name, file_path),
|
||||||
|
)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
self.connection.commit()
|
||||||
|
return result[0] if result else None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error deleting local file: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def file_exists(self, repository_name: str, file_path: str):
|
||||||
|
"""Check if file exists in local repository"""
|
||||||
|
if not self.available:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT 1 FROM local_files
|
||||||
|
WHERE repository_name = %s AND file_path = %s
|
||||||
|
""",
|
||||||
|
(repository_name, file_path),
|
||||||
|
)
|
||||||
|
return cursor.fetchone() is not None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error checking file existence: {e}")
|
||||||
|
return False
|
||||||
580
src/artifactapi/main.py
Normal file
580
src/artifactapi/main.py
Normal file
@ -0,0 +1,580 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import hashlib
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
import httpx
|
||||||
|
from fastapi import FastAPI, HTTPException, Response, Query, File, UploadFile
|
||||||
|
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||||
|
|
||||||
|
from .config import ConfigManager
|
||||||
|
from .database import DatabaseManager
|
||||||
|
from .storage import S3Storage
|
||||||
|
from .cache import RedisCache
|
||||||
|
from .metrics import MetricsManager
|
||||||
|
|
||||||
|
|
||||||
|
class ArtifactRequest(BaseModel):
|
||||||
|
remote: str
|
||||||
|
include_pattern: str
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="Artifact Storage API", version="2.0.0")
|
||||||
|
|
||||||
|
# Initialize components using config
|
||||||
|
config = ConfigManager("remotes.yaml")
|
||||||
|
|
||||||
|
# Get configurations
|
||||||
|
s3_config = config.get_s3_config()
|
||||||
|
redis_config = config.get_redis_config()
|
||||||
|
db_config = config.get_database_config()
|
||||||
|
|
||||||
|
# Initialize services
|
||||||
|
storage = S3Storage(**s3_config)
|
||||||
|
cache = RedisCache(redis_config["url"])
|
||||||
|
database = DatabaseManager(db_config["url"])
|
||||||
|
metrics = MetricsManager(cache, database)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def read_root():
|
||||||
|
config._check_reload()
|
||||||
|
return {
|
||||||
|
"message": "Artifact Storage API",
|
||||||
|
"version": "2.0.0",
|
||||||
|
"remotes": list(config.config.get("remotes", {}).keys()),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health_check():
|
||||||
|
return {"status": "healthy"}
|
||||||
|
|
||||||
|
|
||||||
|
async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
base_url = remote_config.get("base_url")
|
||||||
|
if not base_url:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail=f"No base_url configured for remote '{remote_name}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
async def check_artifact_patterns(
|
||||||
|
remote_name: str, repo_path: str, file_path: str, full_path: str
|
||||||
|
) -> bool:
|
||||||
|
# First check if this is an index file - always allow index files
|
||||||
|
if cache.is_index_file(file_path) or cache.is_index_file(full_path):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Then check basic include patterns
|
||||||
|
patterns = config.get_repository_patterns(remote_name, repo_path)
|
||||||
|
if not patterns:
|
||||||
|
return True # Allow all if no patterns configured
|
||||||
|
|
||||||
|
pattern_matched = False
|
||||||
|
for pattern in patterns:
|
||||||
|
# Check both file_path and full_path to handle different pattern types
|
||||||
|
if re.search(pattern, file_path) or re.search(pattern, full_path):
|
||||||
|
pattern_matched = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not pattern_matched:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# All remotes now use pattern-based filtering only - no additional checks needed
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
||||||
|
# Check if using URL-based key or path-based key
|
||||||
|
if url.startswith("http"):
|
||||||
|
key = storage.get_object_key(url)
|
||||||
|
else:
|
||||||
|
key = storage.get_object_key_from_path(remote_name, path)
|
||||||
|
|
||||||
|
if storage.exists(key):
|
||||||
|
return {
|
||||||
|
"url": url,
|
||||||
|
"cached_url": storage.get_url(key),
|
||||||
|
"status": "already_cached",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
storage_path = storage.upload(key, response.content)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"url": url,
|
||||||
|
"cached_url": storage.get_url(key),
|
||||||
|
"storage_path": storage_path,
|
||||||
|
"size": len(response.content),
|
||||||
|
"status": "cached",
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"url": url, "status": "error", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
|
async def get_artifact(remote_name: str, path: str):
|
||||||
|
# Check if remote is configured
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if this is a local repository
|
||||||
|
if remote_config.get("type") == "local":
|
||||||
|
# Handle local repository download
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
# Get file from S3
|
||||||
|
content = storage.download_object(metadata["s3_key"])
|
||||||
|
if content is None:
|
||||||
|
raise HTTPException(status_code=500, detail="File not accessible")
|
||||||
|
|
||||||
|
# Determine content type
|
||||||
|
content_type = metadata.get("content_type", "application/octet-stream")
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=content,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={os.path.basename(path)}"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract repository path for pattern checking
|
||||||
|
path_parts = path.split("/")
|
||||||
|
if len(path_parts) >= 2:
|
||||||
|
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
||||||
|
file_path = "/".join(path_parts[2:])
|
||||||
|
else:
|
||||||
|
repo_path = path
|
||||||
|
file_path = path
|
||||||
|
|
||||||
|
# Check if artifact matches configured patterns
|
||||||
|
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=403, detail="Artifact not allowed by configuration patterns"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Construct the remote URL
|
||||||
|
remote_url = await construct_remote_url(remote_name, path)
|
||||||
|
|
||||||
|
# Check if artifact is already cached (try both URL and path-based keys)
|
||||||
|
url_key = storage.get_object_key(remote_url)
|
||||||
|
path_key = storage.get_object_key_from_path(remote_name, path)
|
||||||
|
|
||||||
|
cached_key = None
|
||||||
|
if storage.exists(url_key):
|
||||||
|
cached_key = url_key
|
||||||
|
elif storage.exists(path_key):
|
||||||
|
cached_key = path_key
|
||||||
|
|
||||||
|
# For index files, check Redis TTL validity
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
is_index = cache.is_index_file(filename)
|
||||||
|
|
||||||
|
if cached_key and is_index:
|
||||||
|
# Index file exists, but check if it's still valid
|
||||||
|
if not cache.is_index_valid(remote_name, path):
|
||||||
|
# Index has expired, remove it from S3
|
||||||
|
cache.cleanup_expired_index(storage, remote_name, path)
|
||||||
|
cached_key = None # Force re-download
|
||||||
|
|
||||||
|
if cached_key:
|
||||||
|
# Return cached artifact
|
||||||
|
try:
|
||||||
|
artifact_data = storage.download_object(cached_key)
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
|
||||||
|
# Determine content type based on file extension
|
||||||
|
content_type = "application/octet-stream"
|
||||||
|
if filename.endswith(".tar.gz"):
|
||||||
|
content_type = "application/gzip"
|
||||||
|
elif filename.endswith(".zip"):
|
||||||
|
content_type = "application/zip"
|
||||||
|
elif filename.endswith(".exe"):
|
||||||
|
content_type = "application/x-msdownload"
|
||||||
|
elif filename.endswith(".rpm"):
|
||||||
|
content_type = "application/x-rpm"
|
||||||
|
elif filename.endswith(".xml"):
|
||||||
|
content_type = "application/xml"
|
||||||
|
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
|
content_type = "application/gzip"
|
||||||
|
|
||||||
|
# Record cache hit metrics
|
||||||
|
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||||
|
|
||||||
|
# Record artifact mapping in database if not already recorded
|
||||||
|
database.record_artifact_mapping(
|
||||||
|
cached_key, remote_name, path, len(artifact_data)
|
||||||
|
)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "cache",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail=f"Error retrieving cached artifact: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Artifact not cached, cache it first
|
||||||
|
result = await cache_single_artifact(remote_url, remote_name, path)
|
||||||
|
|
||||||
|
if result["status"] == "error":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502, detail=f"Failed to fetch artifact: {result['error']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mark index files as cached in Redis if this was a new download
|
||||||
|
if result["status"] == "cached" and is_index:
|
||||||
|
# Get TTL from remote config
|
||||||
|
cache_config = config.get_cache_config(remote_name)
|
||||||
|
index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes
|
||||||
|
cache.mark_index_cached(remote_name, path, index_ttl)
|
||||||
|
|
||||||
|
# Now return the cached artifact
|
||||||
|
try:
|
||||||
|
cache_key = storage.get_object_key(remote_url)
|
||||||
|
artifact_data = storage.download_object(cache_key)
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
|
||||||
|
content_type = "application/octet-stream"
|
||||||
|
if filename.endswith(".tar.gz"):
|
||||||
|
content_type = "application/gzip"
|
||||||
|
elif filename.endswith(".zip"):
|
||||||
|
content_type = "application/zip"
|
||||||
|
elif filename.endswith(".exe"):
|
||||||
|
content_type = "application/x-msdownload"
|
||||||
|
elif filename.endswith(".rpm"):
|
||||||
|
content_type = "application/x-rpm"
|
||||||
|
elif filename.endswith(".xml"):
|
||||||
|
content_type = "application/xml"
|
||||||
|
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
|
content_type = "application/gzip"
|
||||||
|
|
||||||
|
# Record cache miss metrics
|
||||||
|
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||||
|
|
||||||
|
# Record artifact mapping in database
|
||||||
|
cache_key = storage.get_object_key(remote_url)
|
||||||
|
database.record_artifact_mapping(
|
||||||
|
cache_key, remote_name, path, len(artifact_data)
|
||||||
|
)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "remote",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def discover_artifacts(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
if "github.com" in remote:
|
||||||
|
return await discover_github_releases(remote, include_pattern)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
||||||
|
|
||||||
|
|
||||||
|
async def discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
||||||
|
if not match:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
||||||
|
|
||||||
|
owner, repo = match.groups()
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(
|
||||||
|
f"https://api.github.com/repos/{owner}/{repo}/releases"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=response.status_code,
|
||||||
|
detail=f"Failed to fetch releases: {response.text}",
|
||||||
|
)
|
||||||
|
|
||||||
|
releases = response.json()
|
||||||
|
|
||||||
|
matching_urls = []
|
||||||
|
pattern = include_pattern.replace("*", ".*")
|
||||||
|
regex = re.compile(pattern)
|
||||||
|
|
||||||
|
for release in releases:
|
||||||
|
for asset in release.get("assets", []):
|
||||||
|
download_url = asset["browser_download_url"]
|
||||||
|
if regex.search(download_url):
|
||||||
|
matching_urls.append(download_url)
|
||||||
|
|
||||||
|
return matching_urls
|
||||||
|
|
||||||
|
|
||||||
|
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
|
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
||||||
|
"""Upload a file to local repository"""
|
||||||
|
# Check if remote is configured and is local
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail="Upload only supported for local repositories"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read file content
|
||||||
|
content = await file.read()
|
||||||
|
|
||||||
|
# Calculate SHA256
|
||||||
|
sha256_sum = hashlib.sha256(content).hexdigest()
|
||||||
|
|
||||||
|
# Check if file already exists (prevent overwrite)
|
||||||
|
if database.file_exists(remote_name, path):
|
||||||
|
raise HTTPException(status_code=409, detail="File already exists")
|
||||||
|
|
||||||
|
# Generate S3 key
|
||||||
|
s3_key = f"local/{remote_name}/{path}"
|
||||||
|
|
||||||
|
# Determine content type
|
||||||
|
content_type = file.content_type or "application/octet-stream"
|
||||||
|
|
||||||
|
# Upload to S3
|
||||||
|
try:
|
||||||
|
storage.upload(s3_key, content)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
||||||
|
|
||||||
|
# Add to database
|
||||||
|
success = database.add_local_file(
|
||||||
|
repository_name=remote_name,
|
||||||
|
file_path=path,
|
||||||
|
s3_key=s3_key,
|
||||||
|
size_bytes=len(content),
|
||||||
|
sha256_sum=sha256_sum,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
# Clean up S3 if database insert failed
|
||||||
|
storage.delete_object(s3_key)
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"message": "File uploaded successfully",
|
||||||
|
"file_path": path,
|
||||||
|
"size_bytes": len(content),
|
||||||
|
"sha256_sum": sha256_sum,
|
||||||
|
"content_type": content_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
|
def check_file_exists(remote_name: str, path: str):
|
||||||
|
"""Check if file exists (for CI jobs) - supports local repositories only"""
|
||||||
|
# Check if remote is configured
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle local repository
|
||||||
|
if remote_config.get("type") == "local":
|
||||||
|
try:
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
headers={
|
||||||
|
"Content-Length": str(metadata["size_bytes"]),
|
||||||
|
"Content-Type": metadata.get(
|
||||||
|
"content_type", "application/octet-stream"
|
||||||
|
),
|
||||||
|
"X-SHA256": metadata["sha256_sum"],
|
||||||
|
"X-Created-At": metadata["created_at"].isoformat()
|
||||||
|
if metadata["created_at"]
|
||||||
|
else "",
|
||||||
|
"X-Uploaded-At": metadata["uploaded_at"].isoformat()
|
||||||
|
if metadata["uploaded_at"]
|
||||||
|
else "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||||
|
else:
|
||||||
|
# For remote repositories, just return 405 Method Not Allowed
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=405, detail="HEAD method only supported for local repositories"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
|
def delete_file(remote_name: str, path: str):
|
||||||
|
"""Delete a file from local repository"""
|
||||||
|
# Check if remote is configured and is local
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail="Delete only supported for local repositories"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get S3 key before deleting from database
|
||||||
|
s3_key = database.delete_local_file(remote_name, path)
|
||||||
|
if not s3_key:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
# Delete from S3
|
||||||
|
if not storage.delete_object(s3_key):
|
||||||
|
# File was deleted from database but not from S3 - log warning but continue
|
||||||
|
print(f"Warning: Failed to delete S3 object {s3_key}")
|
||||||
|
|
||||||
|
return JSONResponse({"message": "File deleted successfully"})
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/v1/artifacts/cache")
|
||||||
|
async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await discover_artifacts(
|
||||||
|
request.remote, request.include_pattern
|
||||||
|
)
|
||||||
|
|
||||||
|
if not matching_urls:
|
||||||
|
return {
|
||||||
|
"message": "No matching artifacts found",
|
||||||
|
"cached_count": 0,
|
||||||
|
"artifacts": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
cached_artifacts = []
|
||||||
|
|
||||||
|
for url in matching_urls:
|
||||||
|
result = await cache_single_artifact(url, "", "")
|
||||||
|
cached_artifacts.append(result)
|
||||||
|
|
||||||
|
cached_count = sum(
|
||||||
|
1
|
||||||
|
for artifact in cached_artifacts
|
||||||
|
if artifact["status"] in ["cached", "already_cached"]
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||||
|
"cached_count": cached_count,
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/artifacts/{remote:path}")
|
||||||
|
async def list_cached_artifacts(
|
||||||
|
remote: str, include_pattern: str = ".*"
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await discover_artifacts(remote, include_pattern)
|
||||||
|
|
||||||
|
cached_artifacts = []
|
||||||
|
for url in matching_urls:
|
||||||
|
key = storage.get_object_key(url)
|
||||||
|
if storage.exists(key):
|
||||||
|
cached_artifacts.append(
|
||||||
|
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"remote": remote,
|
||||||
|
"pattern": include_pattern,
|
||||||
|
"total_found": len(matching_urls),
|
||||||
|
"cached_count": len(cached_artifacts),
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/metrics")
|
||||||
|
def get_metrics(
|
||||||
|
json: Optional[bool] = Query(
|
||||||
|
False, description="Return JSON format instead of Prometheus"
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""Get comprehensive metrics about the artifact storage system"""
|
||||||
|
config._check_reload()
|
||||||
|
|
||||||
|
if json:
|
||||||
|
# Return JSON format
|
||||||
|
return metrics.get_metrics(storage, config)
|
||||||
|
else:
|
||||||
|
# Return Prometheus format
|
||||||
|
metrics.get_metrics(storage, config) # Update gauges
|
||||||
|
prometheus_data = generate_latest().decode("utf-8")
|
||||||
|
return PlainTextResponse(prometheus_data, media_type=CONTENT_TYPE_LATEST)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/config")
|
||||||
|
def get_config():
|
||||||
|
return config.config
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
229
src/artifactapi/metrics.py
Normal file
229
src/artifactapi/metrics.py
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any
|
||||||
|
from prometheus_client import Counter, Gauge
|
||||||
|
|
||||||
|
|
||||||
|
# Prometheus metrics
|
||||||
|
request_counter = Counter(
|
||||||
|
"artifact_requests_total", "Total artifact requests", ["remote", "status"]
|
||||||
|
)
|
||||||
|
cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"])
|
||||||
|
cache_miss_counter = Counter(
|
||||||
|
"artifact_cache_misses_total", "Total cache misses", ["remote"]
|
||||||
|
)
|
||||||
|
bandwidth_saved_counter = Counter(
|
||||||
|
"artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"]
|
||||||
|
)
|
||||||
|
storage_size_gauge = Gauge(
|
||||||
|
"artifact_storage_size_bytes", "Storage size by remote", ["remote"]
|
||||||
|
)
|
||||||
|
redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys")
|
||||||
|
|
||||||
|
|
||||||
|
class MetricsManager:
|
||||||
|
def __init__(self, redis_client=None, database_manager=None):
|
||||||
|
self.redis_client = redis_client
|
||||||
|
self.database_manager = database_manager
|
||||||
|
self.start_time = datetime.now()
|
||||||
|
|
||||||
|
def record_cache_hit(self, remote_name: str, size_bytes: int):
|
||||||
|
"""Record a cache hit with size for bandwidth calculation"""
|
||||||
|
# Update Prometheus metrics
|
||||||
|
request_counter.labels(remote=remote_name, status="cache_hit").inc()
|
||||||
|
cache_hit_counter.labels(remote=remote_name).inc()
|
||||||
|
bandwidth_saved_counter.labels(remote=remote_name).inc(size_bytes)
|
||||||
|
|
||||||
|
# Update Redis for persistence across instances
|
||||||
|
if self.redis_client and self.redis_client.available:
|
||||||
|
try:
|
||||||
|
# Increment global counters
|
||||||
|
self.redis_client.client.incr("metrics:cache_hits")
|
||||||
|
self.redis_client.client.incr("metrics:total_requests")
|
||||||
|
self.redis_client.client.incrby("metrics:bandwidth_saved", size_bytes)
|
||||||
|
|
||||||
|
# Increment per-remote counters
|
||||||
|
self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}")
|
||||||
|
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
||||||
|
self.redis_client.client.incrby(
|
||||||
|
f"metrics:bandwidth_saved:{remote_name}", size_bytes
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def record_cache_miss(self, remote_name: str, size_bytes: int):
|
||||||
|
"""Record a cache miss (new download)"""
|
||||||
|
# Update Prometheus metrics
|
||||||
|
request_counter.labels(remote=remote_name, status="cache_miss").inc()
|
||||||
|
cache_miss_counter.labels(remote=remote_name).inc()
|
||||||
|
|
||||||
|
# Update Redis for persistence across instances
|
||||||
|
if self.redis_client and self.redis_client.available:
|
||||||
|
try:
|
||||||
|
# Increment global counters
|
||||||
|
self.redis_client.client.incr("metrics:cache_misses")
|
||||||
|
self.redis_client.client.incr("metrics:total_requests")
|
||||||
|
|
||||||
|
# Increment per-remote counters
|
||||||
|
self.redis_client.client.incr(f"metrics:cache_misses:{remote_name}")
|
||||||
|
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_redis_key_count(self) -> int:
|
||||||
|
"""Get total number of keys in Redis"""
|
||||||
|
if self.redis_client and self.redis_client.available:
|
||||||
|
try:
|
||||||
|
return self.redis_client.client.dbsize()
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_s3_total_size(self, storage) -> int:
|
||||||
|
"""Get total size of all objects in S3 bucket"""
|
||||||
|
try:
|
||||||
|
total_size = 0
|
||||||
|
paginator = storage.client.get_paginator("list_objects_v2")
|
||||||
|
for page in paginator.paginate(Bucket=storage.bucket):
|
||||||
|
if "Contents" in page:
|
||||||
|
for obj in page["Contents"]:
|
||||||
|
total_size += obj["Size"]
|
||||||
|
return total_size
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_s3_size_by_remote(self, storage, config_manager) -> Dict[str, int]:
|
||||||
|
"""Get size of stored data per remote using database mappings"""
|
||||||
|
if self.database_manager and self.database_manager.available:
|
||||||
|
# Get from database if available
|
||||||
|
db_sizes = self.database_manager.get_storage_by_remote()
|
||||||
|
if db_sizes:
|
||||||
|
# Initialize all configured remotes to 0
|
||||||
|
remote_sizes = {}
|
||||||
|
for remote in config_manager.config.get("remotes", {}).keys():
|
||||||
|
remote_sizes[remote] = db_sizes.get(remote, 0)
|
||||||
|
|
||||||
|
# Update Prometheus gauges
|
||||||
|
for remote, size in remote_sizes.items():
|
||||||
|
storage_size_gauge.labels(remote=remote).set(size)
|
||||||
|
|
||||||
|
return remote_sizes
|
||||||
|
|
||||||
|
# Fallback to S3 scanning if database not available
|
||||||
|
try:
|
||||||
|
remote_sizes = {}
|
||||||
|
remotes = config_manager.config.get("remotes", {}).keys()
|
||||||
|
|
||||||
|
# Initialize all remotes to 0
|
||||||
|
for remote in remotes:
|
||||||
|
remote_sizes[remote] = 0
|
||||||
|
|
||||||
|
paginator = storage.client.get_paginator("list_objects_v2")
|
||||||
|
for page in paginator.paginate(Bucket=storage.bucket):
|
||||||
|
if "Contents" in page:
|
||||||
|
for obj in page["Contents"]:
|
||||||
|
key = obj["Key"]
|
||||||
|
# Try to map from database first
|
||||||
|
remote = None
|
||||||
|
if self.database_manager:
|
||||||
|
remote = self.database_manager.get_remote_for_s3_key(key)
|
||||||
|
|
||||||
|
# Fallback to key parsing
|
||||||
|
if not remote:
|
||||||
|
remote = key.split("/")[0] if "/" in key else "unknown"
|
||||||
|
|
||||||
|
if remote in remote_sizes:
|
||||||
|
remote_sizes[remote] += obj["Size"]
|
||||||
|
else:
|
||||||
|
remote_sizes.setdefault("unknown", 0)
|
||||||
|
remote_sizes["unknown"] += obj["Size"]
|
||||||
|
|
||||||
|
# Update Prometheus gauges
|
||||||
|
for remote, size in remote_sizes.items():
|
||||||
|
if remote != "unknown": # Don't set gauge for unknown
|
||||||
|
storage_size_gauge.labels(remote=remote).set(size)
|
||||||
|
|
||||||
|
return remote_sizes
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_metrics(self, storage, config_manager) -> Dict[str, Any]:
|
||||||
|
"""Get comprehensive metrics"""
|
||||||
|
# Update Redis keys gauge
|
||||||
|
redis_key_count = self.get_redis_key_count()
|
||||||
|
redis_keys_gauge.set(redis_key_count)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"uptime_seconds": int((datetime.now() - self.start_time).total_seconds()),
|
||||||
|
"redis": {"total_keys": redis_key_count},
|
||||||
|
"storage": {
|
||||||
|
"total_size_bytes": self.get_s3_total_size(storage),
|
||||||
|
"size_by_remote": self.get_s3_size_by_remote(storage, config_manager),
|
||||||
|
},
|
||||||
|
"requests": {
|
||||||
|
"cache_hits": 0,
|
||||||
|
"cache_misses": 0,
|
||||||
|
"total_requests": 0,
|
||||||
|
"cache_hit_ratio": 0.0,
|
||||||
|
},
|
||||||
|
"bandwidth": {"saved_bytes": 0},
|
||||||
|
"per_remote": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.redis_client and self.redis_client.available:
|
||||||
|
try:
|
||||||
|
# Get global metrics
|
||||||
|
cache_hits = int(
|
||||||
|
self.redis_client.client.get("metrics:cache_hits") or 0
|
||||||
|
)
|
||||||
|
cache_misses = int(
|
||||||
|
self.redis_client.client.get("metrics:cache_misses") or 0
|
||||||
|
)
|
||||||
|
total_requests = cache_hits + cache_misses
|
||||||
|
bandwidth_saved = int(
|
||||||
|
self.redis_client.client.get("metrics:bandwidth_saved") or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics["requests"]["cache_hits"] = cache_hits
|
||||||
|
metrics["requests"]["cache_misses"] = cache_misses
|
||||||
|
metrics["requests"]["total_requests"] = total_requests
|
||||||
|
metrics["requests"]["cache_hit_ratio"] = (
|
||||||
|
cache_hits / total_requests if total_requests > 0 else 0.0
|
||||||
|
)
|
||||||
|
metrics["bandwidth"]["saved_bytes"] = bandwidth_saved
|
||||||
|
|
||||||
|
# Get per-remote metrics
|
||||||
|
for remote in config_manager.config.get("remotes", {}).keys():
|
||||||
|
remote_cache_hits = int(
|
||||||
|
self.redis_client.client.get(f"metrics:cache_hits:{remote}")
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
remote_cache_misses = int(
|
||||||
|
self.redis_client.client.get(f"metrics:cache_misses:{remote}")
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
remote_total = remote_cache_hits + remote_cache_misses
|
||||||
|
remote_bandwidth_saved = int(
|
||||||
|
self.redis_client.client.get(
|
||||||
|
f"metrics:bandwidth_saved:{remote}"
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics["per_remote"][remote] = {
|
||||||
|
"cache_hits": remote_cache_hits,
|
||||||
|
"cache_misses": remote_cache_misses,
|
||||||
|
"total_requests": remote_total,
|
||||||
|
"cache_hit_ratio": remote_cache_hits / remote_total
|
||||||
|
if remote_total > 0
|
||||||
|
else 0.0,
|
||||||
|
"bandwidth_saved_bytes": remote_bandwidth_saved,
|
||||||
|
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(
|
||||||
|
remote, 0
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return metrics
|
||||||
96
src/artifactapi/storage.py
Normal file
96
src/artifactapi/storage.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import boto3
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
|
||||||
|
class S3Storage:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: str,
|
||||||
|
access_key: str,
|
||||||
|
secret_key: str,
|
||||||
|
bucket: str,
|
||||||
|
secure: bool = False,
|
||||||
|
):
|
||||||
|
self.endpoint = endpoint
|
||||||
|
self.access_key = access_key
|
||||||
|
self.secret_key = secret_key
|
||||||
|
self.bucket = bucket
|
||||||
|
self.secure = secure
|
||||||
|
|
||||||
|
self.client = boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=f"http{'s' if self.secure else ''}://{self.endpoint}",
|
||||||
|
aws_access_key_id=self.access_key,
|
||||||
|
aws_secret_access_key=self.secret_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try to ensure bucket exists, but don't fail if MinIO isn't ready yet
|
||||||
|
try:
|
||||||
|
self._ensure_bucket_exists()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not ensure bucket exists during initialization: {e}")
|
||||||
|
print("Bucket creation will be attempted on first use")
|
||||||
|
|
||||||
|
def _ensure_bucket_exists(self):
|
||||||
|
try:
|
||||||
|
self.client.head_bucket(Bucket=self.bucket)
|
||||||
|
except ClientError:
|
||||||
|
self.client.create_bucket(Bucket=self.bucket)
|
||||||
|
|
||||||
|
def get_object_key(self, url: str) -> str:
|
||||||
|
url_hash = hashlib.sha256(url.encode()).hexdigest()[:16]
|
||||||
|
parsed = urlparse(url)
|
||||||
|
filename = os.path.basename(parsed.path)
|
||||||
|
return f"{parsed.netloc}/{url_hash}/{filename}"
|
||||||
|
|
||||||
|
def get_object_key_from_path(self, remote_name: str, path: str) -> str:
|
||||||
|
# Create a key based on the API path for direct access
|
||||||
|
path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
return f"{remote_name}/{path_hash}/{filename}"
|
||||||
|
|
||||||
|
def exists(self, key: str) -> bool:
|
||||||
|
try:
|
||||||
|
self._ensure_bucket_exists()
|
||||||
|
self.client.head_object(Bucket=self.bucket, Key=key)
|
||||||
|
return True
|
||||||
|
except ClientError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def upload(self, key: str, data: bytes) -> str:
|
||||||
|
self._ensure_bucket_exists()
|
||||||
|
self.client.put_object(Bucket=self.bucket, Key=key, Body=data)
|
||||||
|
return f"s3://{self.bucket}/{key}"
|
||||||
|
|
||||||
|
def get_url(self, key: str) -> str:
|
||||||
|
return f"http://{self.endpoint}/{self.bucket}/{key}"
|
||||||
|
|
||||||
|
def get_presigned_url(self, key: str, expiration: int = 3600) -> str:
|
||||||
|
try:
|
||||||
|
return self.client.generate_presigned_url(
|
||||||
|
"get_object",
|
||||||
|
Params={"Bucket": self.bucket, "Key": key},
|
||||||
|
ExpiresIn=expiration,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return self.get_url(key)
|
||||||
|
|
||||||
|
def download_object(self, key: str) -> bytes:
|
||||||
|
try:
|
||||||
|
self._ensure_bucket_exists()
|
||||||
|
response = self.client.get_object(Bucket=self.bucket, Key=key)
|
||||||
|
return response["Body"].read()
|
||||||
|
except ClientError:
|
||||||
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
||||||
|
|
||||||
|
def delete_object(self, key: str) -> bool:
|
||||||
|
try:
|
||||||
|
self._ensure_bucket_exists()
|
||||||
|
self.client.delete_object(Bucket=self.bucket, Key=key)
|
||||||
|
return True
|
||||||
|
except ClientError:
|
||||||
|
return False
|
||||||
Loading…
Reference in New Issue
Block a user