Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4789635e87 | |||
| ba52fedd27 | |||
| 76633403b2 | |||
| cae3503ac4 | |||
| 3f098df428 | |||
| 64266f40e9 | |||
| be25fc19f7 | |||
| 3bd3ca8b74 | |||
| 373366e695 | |||
| e6d9b175ce | |||
| 0daca40156 | |||
| 0df726467a | |||
| b8bc7f8714 | |||
| 0c780c1bd1 | |||
| 173b5d8b10 | |||
| 3352a3e886 |
@@ -0,0 +1,11 @@
|
|||||||
|
remotes:
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 7200
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
immutable_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 0
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
remotes:
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Python Package Index"
|
||||||
|
check_mutable_updates: true
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
+1
-1
@@ -10,7 +10,7 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "8000:8000"
|
- "8000:8000"
|
||||||
volumes:
|
volumes:
|
||||||
- ./remotes.yaml:/app/remotes.yaml:ro,z
|
- ./examples/single-file/remotes.yaml:/app/remotes.yaml:ro,z
|
||||||
- ./ca-bundle.pem:/app/ca-bundle.pem:ro,z
|
- ./ca-bundle.pem:/app/ca-bundle.pem:ro,z
|
||||||
environment:
|
environment:
|
||||||
- CONFIG_PATH=/app/remotes.yaml
|
- CONFIG_PATH=/app/remotes.yaml
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
remotes:
|
||||||
|
alpine:
|
||||||
|
base_url: "https://dl-cdn.alpinelinux.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "alpine"
|
||||||
|
description: "Alpine Linux APK package repository"
|
||||||
|
immutable_patterns:
|
||||||
|
- ".*/x86_64/.*\\.apk$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 7200
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
remotes:
|
||||||
|
github:
|
||||||
|
base_url: "https://github.com"
|
||||||
|
type: "remote"
|
||||||
|
package: "generic"
|
||||||
|
description: "GitHub releases and files"
|
||||||
|
immutable_patterns:
|
||||||
|
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||||
|
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 0
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
remotes:
|
||||||
|
pypi:
|
||||||
|
base_url: "https://files.pythonhosted.org"
|
||||||
|
type: "remote"
|
||||||
|
package: "pypi"
|
||||||
|
description: "Python Package Index"
|
||||||
|
check_mutable_updates: true
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
cache:
|
||||||
|
immutable_ttl: 0
|
||||||
|
mutable_ttl: 600
|
||||||
@@ -9,6 +9,13 @@
|
|||||||
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
|
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
|
||||||
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
|
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
|
||||||
#
|
#
|
||||||
|
# quarantine_new: Set to true to block immutable artifacts published within the last
|
||||||
|
# quarantine_days days. Requests return 404 until the quarantine period
|
||||||
|
# expires. Fails open when the publish date cannot be determined.
|
||||||
|
# quarantine_days: Number of days to quarantine newly published artifacts (requires
|
||||||
|
# quarantine_new: true). The upstream Last-Modified header is used as
|
||||||
|
# the publish date.
|
||||||
|
#
|
||||||
# WARNING: this file may contain credentials — do not commit real values.
|
# WARNING: this file may contain credentials — do not commit real values.
|
||||||
#
|
#
|
||||||
# Global configuration
|
# Global configuration
|
||||||
@@ -195,15 +202,24 @@ remotes:
|
|||||||
mutable_ttl: 300
|
mutable_ttl: 300
|
||||||
|
|
||||||
pypi:
|
pypi:
|
||||||
base_url: "https://pypi.org"
|
base_url: "https://files.pythonhosted.org"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "pypi"
|
package: "pypi"
|
||||||
description: "Python Package Index — simple repository API"
|
description: "Python Package Index — simple index and package files via a single remote"
|
||||||
# pypi_files_url: the upstream host used in simple-index hrefs (default: files.pythonhosted.org)
|
# simple/ requests are transparently fetched from pypi.org; package files come from
|
||||||
# pypi_files_remote: our proxy remote that will serve those files (default: pypi-files)
|
# files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote.
|
||||||
pypi_files_url: "https://files.pythonhosted.org"
|
|
||||||
pypi_files_remote: "pypi-files"
|
|
||||||
check_mutable_updates: true
|
check_mutable_updates: true
|
||||||
|
# Block packages published within the last 3 days (supply-chain attack mitigation).
|
||||||
|
# Immutable artifacts (wheel/sdist) newer than quarantine_days return 404 until
|
||||||
|
# the window passes. Disable by setting quarantine_new: false or removing both keys.
|
||||||
|
quarantine_new: true
|
||||||
|
quarantine_days: 3
|
||||||
|
immutable_patterns:
|
||||||
|
- "packages/.*\\.whl$"
|
||||||
|
- "packages/.*\\.whl\\.metadata$"
|
||||||
|
- "packages/.*\\.tar\\.gz$"
|
||||||
|
- "packages/.*\\.zip$"
|
||||||
|
- "packages/.*\\.egg$"
|
||||||
cache:
|
cache:
|
||||||
immutable_ttl: 0
|
immutable_ttl: 0
|
||||||
mutable_ttl: 600 # Simple index pages refreshed after 10 minutes
|
mutable_ttl: 600 # Simple index pages refreshed after 10 minutes
|
||||||
@@ -212,12 +228,9 @@ remotes:
|
|||||||
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "pypi"
|
package: "pypi"
|
||||||
description: "Private Gitea PyPI registry"
|
description: "Private Gitea PyPI registry — simple index and files at the same host"
|
||||||
# username: "your-gitea-username"
|
# username: "your-gitea-username"
|
||||||
# password: "your-personal-access-token" # needs package:read scope
|
# password: "your-personal-access-token" # needs package:read scope
|
||||||
# Files are served from the same Gitea instance — rewrite back to this same remote
|
|
||||||
pypi_files_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
|
||||||
pypi_files_remote: "pypi-gitea"
|
|
||||||
check_mutable_updates: true
|
check_mutable_updates: true
|
||||||
immutable_patterns:
|
immutable_patterns:
|
||||||
- "files/.*\\.whl$"
|
- "files/.*\\.whl$"
|
||||||
@@ -229,29 +242,11 @@ remotes:
|
|||||||
immutable_ttl: 0
|
immutable_ttl: 0
|
||||||
mutable_ttl: 600
|
mutable_ttl: 600
|
||||||
|
|
||||||
pypi-files:
|
|
||||||
base_url: "https://files.pythonhosted.org"
|
|
||||||
type: "remote"
|
|
||||||
package: "generic"
|
|
||||||
description: "Python Package Index — file storage (wheels, sdists)"
|
|
||||||
immutable_patterns:
|
|
||||||
- "packages/.*\\.whl$"
|
|
||||||
- "packages/.*\\.whl\\.metadata$"
|
|
||||||
- "packages/.*\\.tar\\.gz$"
|
|
||||||
- "packages/.*\\.zip$"
|
|
||||||
- "packages/.*\\.egg$"
|
|
||||||
cache:
|
|
||||||
immutable_ttl: 0 # Package files are content-addressed — cache forever
|
|
||||||
|
|
||||||
npm:
|
npm:
|
||||||
base_url: "https://registry.npmjs.org"
|
base_url: "https://registry.npmjs.org"
|
||||||
type: "remote"
|
type: "remote"
|
||||||
package: "npm"
|
package: "npm"
|
||||||
description: "npm registry — package metadata with tarball URL rewriting"
|
description: "npm registry — package metadata with tarball URL rewriting"
|
||||||
# npm_files_url: the upstream host used in metadata tarball hrefs (default: https://registry.npmjs.org)
|
|
||||||
# npm_files_remote: our proxy remote that will serve those tarballs (default: npm-files)
|
|
||||||
npm_files_url: "https://registry.npmjs.org"
|
|
||||||
npm_files_remote: "npm"
|
|
||||||
check_mutable_updates: true
|
check_mutable_updates: true
|
||||||
immutable_patterns:
|
immutable_patterns:
|
||||||
- \.tgz$
|
- \.tgz$
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from . import discovery, docker, flush, local, proxy
|
||||||
|
|
||||||
|
__all__ = ["discovery", "docker", "flush", "local", "proxy"]
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from .proxy import cache_single_artifact
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def _discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
||||||
|
if not match:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
||||||
|
|
||||||
|
owner, repo = match.groups()
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise HTTPException(status_code=response.status_code, detail=f"Failed to fetch releases: {response.text}")
|
||||||
|
|
||||||
|
releases = response.json()
|
||||||
|
regex = re.compile(include_pattern.replace("*", ".*"))
|
||||||
|
return [
|
||||||
|
asset["browser_download_url"]
|
||||||
|
for release in releases
|
||||||
|
for asset in release.get("assets", [])
|
||||||
|
if regex.search(asset["browser_download_url"])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def _discover(remote: str, include_pattern: str) -> list[str]:
|
||||||
|
if "github.com" in remote:
|
||||||
|
return await _discover_github_releases(remote, include_pattern)
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
||||||
|
|
||||||
|
|
||||||
|
async def cache_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await _discover(remote, include_pattern)
|
||||||
|
|
||||||
|
if not matching_urls:
|
||||||
|
return {"message": "No matching artifacts found", "cached_count": 0, "artifacts": []}
|
||||||
|
|
||||||
|
cached_artifacts = []
|
||||||
|
for url in matching_urls:
|
||||||
|
result = await cache_single_artifact(url, "", "", storage, {})
|
||||||
|
cached_artifacts.append(result)
|
||||||
|
|
||||||
|
cached_count = sum(1 for a in cached_artifacts if a["status"] in ["cached", "already_cached"])
|
||||||
|
return {
|
||||||
|
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||||
|
"cached_count": cached_count,
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def list_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
matching_urls = await _discover(remote, include_pattern)
|
||||||
|
cached_artifacts = []
|
||||||
|
for url in matching_urls:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
key = storage.get_object_key(remote, parsed.path)
|
||||||
|
if storage.exists(key):
|
||||||
|
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"remote": remote,
|
||||||
|
"pattern": include_pattern,
|
||||||
|
"total_found": len(matching_urls),
|
||||||
|
"cached_count": len(cached_artifacts),
|
||||||
|
"artifacts": cached_artifacts,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from . import proxy as _proxy
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def ping() -> Response:
|
||||||
|
return Response(
|
||||||
|
content="{}",
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def proxy(request: Request, remote_name: str, path: str, storage, cache, config, metrics) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("package") != "docker":
|
||||||
|
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
||||||
|
|
||||||
|
patterns = config.get_immutable_patterns(remote_name, "")
|
||||||
|
if patterns:
|
||||||
|
path_parts = path.split("/")
|
||||||
|
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
||||||
|
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
||||||
|
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
||||||
|
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
||||||
|
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
remote_url = f"{base_url}/v2/{path}"
|
||||||
|
|
||||||
|
cached_key = storage.get_object_key(remote_name, path)
|
||||||
|
if not storage.exists(cached_key):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||||
|
|
||||||
|
if cached_key and is_mutable:
|
||||||
|
if not cache.is_index_valid(remote_name, path):
|
||||||
|
if not await _proxy.handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
if not cached_key:
|
||||||
|
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||||
|
result = await _proxy.cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||||
|
if result["status"] == "error":
|
||||||
|
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
||||||
|
if result["status"] == "cached" and is_mutable:
|
||||||
|
cache_config = config.get_cache_config(remote_name)
|
||||||
|
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||||
|
if result.get("etag") or result.get("last_modified"):
|
||||||
|
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||||
|
if not is_mutable:
|
||||||
|
published = result.get("last_modified")
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_proxy._check_quarantine(remote_name, published, config)
|
||||||
|
elif not is_mutable:
|
||||||
|
published = cache.get_artifact_published(remote_name, path)
|
||||||
|
if not published:
|
||||||
|
published = await _proxy._fetch_last_modified(remote_url, remote_config)
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_proxy._check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||||
|
|
||||||
|
is_blob = "/blobs/" in path
|
||||||
|
if is_blob:
|
||||||
|
content_type = "application/octet-stream"
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
manifest_json = json.loads(artifact_data)
|
||||||
|
content_type = manifest_json.get("mediaType")
|
||||||
|
if not content_type:
|
||||||
|
if "manifests" in manifest_json:
|
||||||
|
content_type = "application/vnd.oci.image.index.v1+json"
|
||||||
|
else:
|
||||||
|
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||||
|
except Exception:
|
||||||
|
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||||
|
|
||||||
|
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
||||||
|
headers = {
|
||||||
|
"Docker-Distribution-Api-Version": "registry/2.0",
|
||||||
|
"Docker-Content-Digest": digest,
|
||||||
|
"Content-Length": str(len(artifact_data)),
|
||||||
|
}
|
||||||
|
|
||||||
|
if request.method == "HEAD":
|
||||||
|
return Response(status_code=200, headers=headers, media_type=content_type)
|
||||||
|
|
||||||
|
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||||
|
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def handle(remote: str | None, cache_type: str, cache, storage) -> dict:
|
||||||
|
try:
|
||||||
|
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||||
|
|
||||||
|
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||||
|
patterns = []
|
||||||
|
|
||||||
|
if cache_type in ["all", "index"]:
|
||||||
|
if remote:
|
||||||
|
patterns += [f"index:{remote}:*", f"mutable:meta:{remote}:*"]
|
||||||
|
else:
|
||||||
|
patterns += ["index:*", "mutable:meta:*"]
|
||||||
|
|
||||||
|
if cache_type in ["all", "metrics"]:
|
||||||
|
patterns.append(f"metrics:*:{remote}" if remote else "metrics:*")
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
keys = cache.client.keys(pattern)
|
||||||
|
if keys:
|
||||||
|
cache.client.delete(*keys)
|
||||||
|
result["flushed"]["redis_keys"] += len(keys)
|
||||||
|
logger.info(f"Cache flush: deleted {len(keys)} Redis keys matching '{pattern}'")
|
||||||
|
|
||||||
|
if result["flushed"]["redis_keys"] > 0:
|
||||||
|
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
||||||
|
|
||||||
|
if cache_type in ["all", "files"]:
|
||||||
|
try:
|
||||||
|
list_params = {"Bucket": storage.bucket}
|
||||||
|
if remote:
|
||||||
|
list_params["Prefix"] = f"{remote}/"
|
||||||
|
|
||||||
|
response = storage.client.list_objects_v2(**list_params)
|
||||||
|
if "Contents" in response:
|
||||||
|
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||||
|
for key in objects_to_delete:
|
||||||
|
try:
|
||||||
|
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
||||||
|
result["flushed"]["s3_objects"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
||||||
|
|
||||||
|
if objects_to_delete:
|
||||||
|
scope = f" for remote '{remote}'" if remote else ""
|
||||||
|
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||||
|
logger.info(f"Cache flush: deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
||||||
|
logger.error(f"Cache flush S3 error: {e}")
|
||||||
|
|
||||||
|
if not result["flushed"]["operations"]:
|
||||||
|
result["flushed"]["operations"].append("No cache entries found to flush")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Cache flush error: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Response, UploadFile
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def upload(remote_name: str, path: str, file: UploadFile, storage, database, config) -> JSONResponse:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = await file.read()
|
||||||
|
sha256_sum = hashlib.sha256(content).hexdigest()
|
||||||
|
|
||||||
|
if database.file_exists(remote_name, path):
|
||||||
|
raise HTTPException(status_code=409, detail="File already exists")
|
||||||
|
|
||||||
|
s3_key = f"local/{remote_name}/{path}"
|
||||||
|
content_type = file.content_type or "application/octet-stream"
|
||||||
|
|
||||||
|
try:
|
||||||
|
storage.upload(s3_key, content)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
||||||
|
|
||||||
|
success = database.add_local_file(
|
||||||
|
repository_name=remote_name,
|
||||||
|
file_path=path,
|
||||||
|
s3_key=s3_key,
|
||||||
|
size_bytes=len(content),
|
||||||
|
sha256_sum=sha256_sum,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
storage.delete_object(s3_key)
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"message": "File uploaded successfully",
|
||||||
|
"file_path": path,
|
||||||
|
"size_bytes": len(content),
|
||||||
|
"sha256_sum": sha256_sum,
|
||||||
|
"content_type": content_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_exists(remote_name: str, path: str, database, config) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
headers={
|
||||||
|
"Content-Length": str(metadata["size_bytes"]),
|
||||||
|
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||||
|
"X-SHA256": metadata["sha256_sum"],
|
||||||
|
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||||
|
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def delete(remote_name: str, path: str, storage, database, config) -> JSONResponse:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
if remote_config.get("type") != "local":
|
||||||
|
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3_key = database.delete_local_file(remote_name, path)
|
||||||
|
if not s3_key:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
if not storage.delete_object(s3_key):
|
||||||
|
logger.warning(f"Failed to delete S3 object {s3_key} after database removal")
|
||||||
|
|
||||||
|
return JSONResponse({"message": "File deleted successfully"})
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
||||||
@@ -0,0 +1,331 @@
|
|||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from ..auth import get_docker_token_for_response
|
||||||
|
from ..remote import helm as _helm
|
||||||
|
from ..remote import npm as _npm
|
||||||
|
from ..remote import python as _pypi
|
||||||
|
from ..remote.base import get_content_type
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamUnreachable(Exception):
|
||||||
|
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||||
|
|
||||||
|
|
||||||
|
def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None:
|
||||||
|
"""Raise HTTP 404 if the artifact is within the per-remote quarantine window.
|
||||||
|
|
||||||
|
Fails open (allows the request) when the publish date cannot be determined.
|
||||||
|
"""
|
||||||
|
enabled, days = config.get_quarantine_config(remote_name)
|
||||||
|
if not enabled or not days:
|
||||||
|
return
|
||||||
|
if not last_modified_str:
|
||||||
|
return # cannot determine age → allow
|
||||||
|
try:
|
||||||
|
publish_date = parsedate_to_datetime(last_modified_str)
|
||||||
|
except Exception:
|
||||||
|
return # unparseable → allow
|
||||||
|
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||||
|
if publish_date > cutoff:
|
||||||
|
available_on = (publish_date + timedelta(days=days)).date()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=(
|
||||||
|
f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None:
|
||||||
|
"""HEAD the upstream URL and return the Last-Modified header, or None on any failure."""
|
||||||
|
auth = _basic_auth_header(remote_cfg)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.head(remote_url, headers=auth, timeout=10.0)
|
||||||
|
return response.headers.get("Last-Modified")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
||||||
|
username = remote_cfg.get("username")
|
||||||
|
password = remote_cfg.get("password")
|
||||||
|
if username and password:
|
||||||
|
token = base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||||
|
return {"Authorization": f"Basic {token}"}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
remote_config: dict,
|
||||||
|
request: Request,
|
||||||
|
remote_name: str = "",
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
package = remote_config.get("package")
|
||||||
|
proxy_base = str(request.base_url).rstrip("/")
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
|
||||||
|
if package == "pypi":
|
||||||
|
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
|
if package == "npm":
|
||||||
|
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
|
if package == "helm":
|
||||||
|
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
|
||||||
|
return data, get_content_type(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(remote_config: dict, path: str) -> str:
|
||||||
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
|
if remote_config.get("package") == "docker":
|
||||||
|
return f"{base_url}/v2/{path}"
|
||||||
|
if remote_config.get("package") == "pypi":
|
||||||
|
return _pypi.construct_url(base_url, path)
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
async def cache_single_artifact(url: str, remote_name: str, path: str, storage, remote_config: dict) -> dict:
|
||||||
|
key = storage.get_object_key(remote_name, path)
|
||||||
|
|
||||||
|
if storage.exists(key):
|
||||||
|
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||||
|
return {"url": url, "cached_url": storage.get_url(key), "status": "already_cached"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
||||||
|
headers = {}
|
||||||
|
username = remote_config.get("username")
|
||||||
|
password = remote_config.get("password")
|
||||||
|
|
||||||
|
if is_docker:
|
||||||
|
if "/manifests/" in url:
|
||||||
|
headers["Accept"] = (
|
||||||
|
"application/vnd.docker.distribution.manifest.v2+json,"
|
||||||
|
"application/vnd.oci.image.manifest.v1+json,"
|
||||||
|
"application/vnd.oci.image.index.v1+json,"
|
||||||
|
"application/vnd.docker.distribution.manifest.list.v2+json"
|
||||||
|
)
|
||||||
|
elif "/blobs/" in url:
|
||||||
|
headers["Accept"] = "application/octet-stream"
|
||||||
|
elif username and password:
|
||||||
|
headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 401 and is_docker:
|
||||||
|
www_auth = response.headers.get("WWW-Authenticate", "")
|
||||||
|
token = await get_docker_token_for_response(www_auth, username, password)
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
response = await client.get(url, headers=headers)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
storage.upload(key, response.content)
|
||||||
|
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"url": url,
|
||||||
|
"cached_url": storage.get_url(key),
|
||||||
|
"storage_path": f"s3://{storage.bucket}/{key}",
|
||||||
|
"size": len(response.content),
|
||||||
|
"status": "cached",
|
||||||
|
"etag": response.headers.get("ETag"),
|
||||||
|
"last_modified": response.headers.get("Last-Modified"),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"url": url, "status": "error", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool:
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
await client.head(url, headers=auth_headers or {}, timeout=10.0)
|
||||||
|
return True
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException):
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def check_upstream_changed(remote_url: str, remote_name: str, path: str, cache, auth_headers: dict | None = None) -> bool:
|
||||||
|
meta = cache.get_mutable_meta(remote_name, path)
|
||||||
|
if not meta:
|
||||||
|
return True
|
||||||
|
|
||||||
|
headers = dict(auth_headers or {})
|
||||||
|
if meta.get("etag"):
|
||||||
|
headers["If-None-Match"] = meta["etag"]
|
||||||
|
if meta.get("last_modified"):
|
||||||
|
headers["If-Modified-Since"] = meta["last_modified"]
|
||||||
|
if not (meta.get("etag") or meta.get("last_modified")):
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.head(remote_url, headers=headers)
|
||||||
|
return response.status_code != 304
|
||||||
|
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||||
|
raise UpstreamUnreachable(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str, config, cache, storage) -> bool:
|
||||||
|
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||||
|
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||||
|
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||||
|
auth = _basic_auth_header(remote_cfg)
|
||||||
|
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||||
|
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||||
|
|
||||||
|
if user_mutable:
|
||||||
|
try:
|
||||||
|
changed = await check_upstream_changed(remote_url, remote_name, path, cache, auth)
|
||||||
|
except UpstreamUnreachable:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
if not changed:
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||||
|
else:
|
||||||
|
if not await _upstream_reachable(remote_url, auth):
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||||
|
return True
|
||||||
|
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||||
|
|
||||||
|
cache.cleanup_expired_index(storage, remote_name, path)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def handle(request: Request, remote_name: str, path: str, storage, cache, config, database, metrics) -> Response:
|
||||||
|
remote_config = config.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||||
|
|
||||||
|
if remote_config.get("type") == "local":
|
||||||
|
metadata = database.get_local_file_metadata(remote_name, path)
|
||||||
|
if not metadata:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
content = storage.download_object(metadata["s3_key"])
|
||||||
|
if content is None:
|
||||||
|
raise HTTPException(status_code=500, detail="File not accessible")
|
||||||
|
return Response(
|
||||||
|
content=content,
|
||||||
|
media_type=metadata.get("content_type", "application/octet-stream"),
|
||||||
|
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
path_parts = path.split("/")
|
||||||
|
if len(path_parts) >= 2:
|
||||||
|
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
||||||
|
file_path = "/".join(path_parts[2:])
|
||||||
|
else:
|
||||||
|
repo_path = path
|
||||||
|
file_path = path
|
||||||
|
|
||||||
|
mutable_patterns = config.get_mutable_patterns(remote_name)
|
||||||
|
if not cache.is_mutable_file(file_path, mutable_patterns) and not cache.is_mutable_file(path, mutable_patterns):
|
||||||
|
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
||||||
|
if patterns and not any(re.search(p, file_path) or re.search(p, path) for p in patterns):
|
||||||
|
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||||
|
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||||
|
|
||||||
|
remote_url = construct_url(remote_config, path)
|
||||||
|
if not remote_config.get("base_url"):
|
||||||
|
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||||
|
|
||||||
|
cached_key = storage.get_object_key(remote_name, path)
|
||||||
|
if not storage.exists(cached_key):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
is_mutable = cache.is_mutable_file(path, mutable_patterns)
|
||||||
|
|
||||||
|
if cached_key and is_mutable:
|
||||||
|
if not cache.is_index_valid(remote_name, path):
|
||||||
|
if not await handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||||
|
cached_key = None
|
||||||
|
|
||||||
|
if cached_key:
|
||||||
|
if not is_mutable:
|
||||||
|
published = cache.get_artifact_published(remote_name, path)
|
||||||
|
if not published:
|
||||||
|
published = await _fetch_last_modified(remote_url, remote_config)
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
artifact_data = storage.download_object(cached_key)
|
||||||
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||||
|
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||||
|
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "cache",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||||
|
|
||||||
|
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||||
|
result = await cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||||
|
|
||||||
|
if result["status"] == "error":
|
||||||
|
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||||
|
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||||
|
|
||||||
|
if result["status"] == "cached" and is_mutable:
|
||||||
|
cache_config = config.get_cache_config(remote_name)
|
||||||
|
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||||
|
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||||
|
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||||
|
if result.get("etag") or result.get("last_modified"):
|
||||||
|
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||||
|
|
||||||
|
if not is_mutable:
|
||||||
|
published = result.get("last_modified")
|
||||||
|
if published:
|
||||||
|
cache.store_artifact_published(remote_name, path, published)
|
||||||
|
_check_quarantine(remote_name, published, config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
|
artifact_data = storage.download_object(cache_key)
|
||||||
|
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||||
|
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||||
|
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||||
|
return Response(
|
||||||
|
content=artifact_data,
|
||||||
|
media_type=content_type,
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"X-Artifact-Source": "remote",
|
||||||
|
"X-Artifact-Size": str(len(artifact_data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .docker import fetch_token, get_docker_token_for_response, parse_www_authenticate
|
||||||
|
|
||||||
|
__all__ = ["fetch_token", "get_docker_token_for_response", "parse_www_authenticate"]
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# In-memory token cache: key -> (token, expires_at)
|
||||||
|
_token_cache: dict[str, tuple[str, float]] = {}
|
||||||
|
|
||||||
|
_WWW_AUTH_RE = re.compile(
|
||||||
|
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
||||||
|
r'(?:,service="(?P<service>[^"]*)")?'
|
||||||
|
r'(?:,scope="(?P<scope>[^"]*)")?',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||||
|
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_token(key: str) -> str | None:
|
||||||
|
entry = _token_cache.get(key)
|
||||||
|
if entry and entry[1] > time.time():
|
||||||
|
return entry[0]
|
||||||
|
_token_cache.pop(key, None)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _store_token(key: str, token: str, expires_in: int) -> None:
|
||||||
|
# Expire 30s early to avoid using a token right as it expires
|
||||||
|
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_token(
|
||||||
|
realm: str,
|
||||||
|
service: str,
|
||||||
|
scope: str,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||||
|
key = _cache_key(realm, service, scope, username)
|
||||||
|
cached = _get_cached_token(key)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
params: dict[str, str] = {}
|
||||||
|
if service:
|
||||||
|
params["service"] = service
|
||||||
|
if scope:
|
||||||
|
params["scope"] = scope
|
||||||
|
|
||||||
|
auth = (username, password) if username and password else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
response = await client.get(realm, params=params, auth=auth)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
token = data.get("token") or data.get("access_token")
|
||||||
|
if not token:
|
||||||
|
logger.warning(f"Docker token response missing token field: {data}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
expires_in = int(data.get("expires_in", 300))
|
||||||
|
_store_token(key, token, expires_in)
|
||||||
|
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||||
|
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||||
|
m = _WWW_AUTH_RE.search(header)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
||||||
|
|
||||||
|
|
||||||
|
async def get_docker_token_for_response(
|
||||||
|
www_authenticate: str,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||||
|
parsed = parse_www_authenticate(www_authenticate)
|
||||||
|
if not parsed:
|
||||||
|
return None
|
||||||
|
realm, service, scope = parsed
|
||||||
|
return await fetch_token(realm, service, scope, username, password)
|
||||||
Vendored
+3
@@ -0,0 +1,3 @@
|
|||||||
|
from .redis import RedisCache
|
||||||
|
|
||||||
|
__all__ = ["RedisCache"]
|
||||||
+22
-10
@@ -11,7 +11,6 @@ class RedisCache:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
# Test connection
|
|
||||||
self.client.ping()
|
self.client.ping()
|
||||||
self.available = True
|
self.available = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -20,7 +19,6 @@ class RedisCache:
|
|||||||
self.available = False
|
self.available = False
|
||||||
|
|
||||||
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||||
"""Return True if file_path matches any of the mutable patterns."""
|
|
||||||
if patterns is None:
|
if patterns is None:
|
||||||
patterns = []
|
patterns = []
|
||||||
return any(re.search(p, file_path) for p in patterns)
|
return any(re.search(p, file_path) for p in patterns)
|
||||||
@@ -32,10 +30,8 @@ class RedisCache:
|
|||||||
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||||
"""Check if mutable file is still within its TTL window."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
return self.client.exists(key) > 0
|
return self.client.exists(key) > 0
|
||||||
@@ -43,10 +39,8 @@ class RedisCache:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||||
"""Set or refresh the TTL key for a mutable file."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
self.client.setex(key, ttl, str(int(time.time())))
|
self.client.setex(key, ttl, str(int(time.time())))
|
||||||
@@ -54,7 +48,6 @@ class RedisCache:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||||
"""Persist ETag and Last-Modified for future conditional requests."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
data = {}
|
data = {}
|
||||||
@@ -70,7 +63,6 @@ class RedisCache:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||||
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
@@ -86,15 +78,35 @@ class RedisCache:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_artifact_published_key(self, remote_name: str, path: str) -> str:
|
||||||
|
return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
|
def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None:
|
||||||
|
"""Persist the upstream Last-Modified header for a (typically immutable) artifact."""
|
||||||
|
if not self.available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.client.set(self.get_artifact_published_key(remote_name, path), last_modified)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_artifact_published(self, remote_name: str, path: str) -> str | None:
|
||||||
|
"""Return the stored Last-Modified string for an artifact, or None."""
|
||||||
|
if not self.available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return self.client.get(self.get_artifact_published_key(remote_name, path))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||||
"""Remove an expired mutable file from S3 and clear its Redis meta."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from .config import ConfigManager
|
from ..config import ConfigManager
|
||||||
|
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
if config_path:
|
if config_path:
|
||||||
+84
-14
@@ -1,3 +1,4 @@
|
|||||||
|
import glob
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@@ -30,31 +31,87 @@ _PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = {
|
|||||||
|
|
||||||
|
|
||||||
class ConfigManager:
|
class ConfigManager:
|
||||||
def __init__(self, config_file: str = "remotes.yaml"):
|
def __init__(self, config_path: str = "remotes.yaml"):
|
||||||
self.config_file = config_file
|
self.config_path = config_path
|
||||||
self._last_modified = 0
|
self._config_dir: str | None = None
|
||||||
|
self._last_modified: float = 0.0
|
||||||
self.config = self._load_config()
|
self.config = self._load_config()
|
||||||
|
|
||||||
def _load_config(self) -> dict:
|
def _load_single_file(self, path: str) -> dict:
|
||||||
try:
|
try:
|
||||||
with open(self.config_file) as f:
|
with open(path) as f:
|
||||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(".yml"):
|
if path.endswith((".yaml", ".yml")):
|
||||||
return yaml.safe_load(f)
|
return yaml.safe_load(f) or {}
|
||||||
else:
|
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge(base: dict, overlay: dict) -> dict:
|
||||||
|
result = {**base}
|
||||||
|
for key, value in overlay.items():
|
||||||
|
if key == "remotes" and isinstance(base.get("remotes"), dict) and isinstance(value, dict):
|
||||||
|
result["remotes"] = {**base.get("remotes", {}), **value}
|
||||||
|
else:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _load_from_dir(self, dir_path: str) -> dict:
|
||||||
|
merged: dict = {}
|
||||||
|
files = sorted(glob.glob(os.path.join(dir_path, "*.yaml")) + glob.glob(os.path.join(dir_path, "*.yml")))
|
||||||
|
for path in files:
|
||||||
|
merged = self._merge(merged, self._load_single_file(path))
|
||||||
|
return merged
|
||||||
|
|
||||||
|
def _load_config(self) -> dict:
|
||||||
|
self._config_dir = None
|
||||||
|
|
||||||
|
if os.path.isdir(self.config_path):
|
||||||
|
return self._load_from_dir(self.config_path) or {"remotes": {}}
|
||||||
|
|
||||||
|
config = self._load_single_file(self.config_path)
|
||||||
|
if not config:
|
||||||
return {"remotes": {}}
|
return {"remotes": {}}
|
||||||
|
|
||||||
def _check_reload(self) -> None:
|
config_dir = config.pop("config_dir", None)
|
||||||
"""Check if config file has been modified and reload if needed"""
|
if config_dir:
|
||||||
try:
|
if not os.path.isabs(config_dir):
|
||||||
import os
|
config_dir = os.path.join(os.path.dirname(os.path.abspath(self.config_path)), config_dir)
|
||||||
|
self._config_dir = config_dir
|
||||||
|
config = self._merge(config, self._load_from_dir(config_dir))
|
||||||
|
|
||||||
current_modified = os.path.getmtime(self.config_file)
|
return config
|
||||||
|
|
||||||
|
def _file_mtimes(self) -> list[float]:
|
||||||
|
mtimes: list[float] = []
|
||||||
|
if os.path.isdir(self.config_path):
|
||||||
|
for f in glob.glob(os.path.join(self.config_path, "*.yaml")) + glob.glob(os.path.join(self.config_path, "*.yml")):
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(f))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(self.config_path))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if self._config_dir and os.path.isdir(self._config_dir):
|
||||||
|
for f in glob.glob(os.path.join(self._config_dir, "*.yaml")) + glob.glob(os.path.join(self._config_dir, "*.yml")):
|
||||||
|
try:
|
||||||
|
mtimes.append(os.path.getmtime(f))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return mtimes
|
||||||
|
|
||||||
|
def _check_reload(self) -> None:
|
||||||
|
try:
|
||||||
|
current_modified = max(self._file_mtimes(), default=0.0)
|
||||||
if current_modified > self._last_modified:
|
if current_modified > self._last_modified:
|
||||||
self._last_modified = current_modified
|
self._last_modified = current_modified
|
||||||
self.config = self._load_config()
|
self.config = self._load_config()
|
||||||
print(f"Config reloaded from {self.config_file}")
|
print(f"Config reloaded from {self.config_path}")
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -159,3 +216,16 @@ class ConfigManager:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
return remote_config.get("cache", {})
|
return remote_config.get("cache", {})
|
||||||
|
|
||||||
|
def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]:
|
||||||
|
"""Return (enabled, quarantine_days) for a remote.
|
||||||
|
|
||||||
|
When enabled=True and quarantine_days>0, immutable artifacts published
|
||||||
|
within the last quarantine_days days are blocked with a 404.
|
||||||
|
"""
|
||||||
|
remote_config = self.get_remote_config(remote_name)
|
||||||
|
if not remote_config:
|
||||||
|
return False, 0
|
||||||
|
enabled = bool(remote_config.get("quarantine_new", False))
|
||||||
|
days = int(remote_config.get("quarantine_days", 0))
|
||||||
|
return enabled, days
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .postgres import DatabaseManager
|
||||||
|
|
||||||
|
__all__ = ["DatabaseManager"]
|
||||||
@@ -9,7 +9,6 @@ class DatabaseManager:
|
|||||||
self._init_database()
|
self._init_database()
|
||||||
|
|
||||||
def _init_database(self):
|
def _init_database(self):
|
||||||
"""Initialize database connection and create schema if needed"""
|
|
||||||
try:
|
try:
|
||||||
self.connection = psycopg2.connect(self.db_url)
|
self.connection = psycopg2.connect(self.db_url)
|
||||||
self.connection.autocommit = True
|
self.connection.autocommit = True
|
||||||
@@ -21,10 +20,8 @@ class DatabaseManager:
|
|||||||
self.available = False
|
self.available = False
|
||||||
|
|
||||||
def _create_schema(self):
|
def _create_schema(self):
|
||||||
"""Create tables if they don't exist"""
|
|
||||||
try:
|
try:
|
||||||
with self.connection.cursor() as cursor:
|
with self.connection.cursor() as cursor:
|
||||||
# Create table to map S3 keys to remote names
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
@@ -51,7 +48,6 @@ class DatabaseManager:
|
|||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Create indexes separately
|
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||||
@@ -61,7 +57,6 @@ class DatabaseManager:
|
|||||||
print(f"Error creating schema: {e}")
|
print(f"Error creating schema: {e}")
|
||||||
|
|
||||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||||
"""Record mapping between S3 key and remote"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -83,7 +78,6 @@ class DatabaseManager:
|
|||||||
print(f"Error recording artifact mapping: {e}")
|
print(f"Error recording artifact mapping: {e}")
|
||||||
|
|
||||||
def get_storage_by_remote(self) -> dict[str, int]:
|
def get_storage_by_remote(self) -> dict[str, int]:
|
||||||
"""Get storage size breakdown by remote from database"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -101,7 +95,6 @@ class DatabaseManager:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||||
"""Get remote name for given S3 key"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -126,7 +119,6 @@ class DatabaseManager:
|
|||||||
sha256_sum: str,
|
sha256_sum: str,
|
||||||
content_type: str = None,
|
content_type: str = None,
|
||||||
):
|
):
|
||||||
"""Add a file to local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -153,7 +145,6 @@ class DatabaseManager:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||||
"""Get metadata for a local file"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -185,7 +176,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def list_local_files(self, repository_name: str, prefix: str = ""):
|
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||||
"""List files in local repository with optional path prefix"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -229,7 +219,6 @@ class DatabaseManager:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
def delete_local_file(self, repository_name: str, file_path: str):
|
def delete_local_file(self, repository_name: str, file_path: str):
|
||||||
"""Delete a file from local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -251,7 +240,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def file_exists(self, repository_name: str, file_path: str):
|
def file_exists(self, repository_name: str, file_path: str):
|
||||||
"""Check if file exists in local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -1,96 +1,19 @@
|
|||||||
import logging
|
from .auth.docker import (
|
||||||
import re
|
_cache_key,
|
||||||
import time
|
_get_cached_token,
|
||||||
|
_store_token,
|
||||||
import httpx
|
_token_cache,
|
||||||
|
fetch_token,
|
||||||
logger = logging.getLogger(__name__)
|
get_docker_token_for_response,
|
||||||
|
parse_www_authenticate,
|
||||||
# In-memory token cache: key -> (token, expires_at)
|
|
||||||
_token_cache: dict[str, tuple[str, float]] = {}
|
|
||||||
|
|
||||||
_WWW_AUTH_RE = re.compile(
|
|
||||||
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
|
||||||
r'(?:,service="(?P<service>[^"]*)")?'
|
|
||||||
r'(?:,scope="(?P<scope>[^"]*)")?',
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
"_cache_key",
|
||||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
"_get_cached_token",
|
||||||
|
"_store_token",
|
||||||
|
"_token_cache",
|
||||||
def _get_cached_token(key: str) -> str | None:
|
"fetch_token",
|
||||||
entry = _token_cache.get(key)
|
"get_docker_token_for_response",
|
||||||
if entry and entry[1] > time.time():
|
"parse_www_authenticate",
|
||||||
return entry[0]
|
]
|
||||||
_token_cache.pop(key, None)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _store_token(key: str, token: str, expires_in: int) -> None:
|
|
||||||
# Expire 30s early to avoid using a token right as it expires
|
|
||||||
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_token(
|
|
||||||
realm: str,
|
|
||||||
service: str,
|
|
||||||
scope: str,
|
|
||||||
username: str | None = None,
|
|
||||||
password: str | None = None,
|
|
||||||
) -> str | None:
|
|
||||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
|
||||||
key = _cache_key(realm, service, scope, username)
|
|
||||||
cached = _get_cached_token(key)
|
|
||||||
if cached:
|
|
||||||
return cached
|
|
||||||
|
|
||||||
params: dict[str, str] = {}
|
|
||||||
if service:
|
|
||||||
params["service"] = service
|
|
||||||
if scope:
|
|
||||||
params["scope"] = scope
|
|
||||||
|
|
||||||
auth = (username, password) if username and password else None
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(realm, params=params, auth=auth)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
token = data.get("token") or data.get("access_token")
|
|
||||||
if not token:
|
|
||||||
logger.warning(f"Docker token response missing token field: {data}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
expires_in = int(data.get("expires_in", 300))
|
|
||||||
_store_token(key, token, expires_in)
|
|
||||||
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
|
||||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
|
||||||
m = _WWW_AUTH_RE.search(header)
|
|
||||||
if not m:
|
|
||||||
return None
|
|
||||||
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
|
||||||
|
|
||||||
|
|
||||||
async def get_docker_token_for_response(
|
|
||||||
www_authenticate: str,
|
|
||||||
username: str | None = None,
|
|
||||||
password: str | None = None,
|
|
||||||
) -> str | None:
|
|
||||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
|
||||||
parsed = parse_www_authenticate(www_authenticate)
|
|
||||||
if not parsed:
|
|
||||||
return None
|
|
||||||
realm, service, scope = parsed
|
|
||||||
return await fetch_token(realm, service, scope, username, password)
|
|
||||||
|
|||||||
+51
-776
@@ -1,14 +1,8 @@
|
|||||||
import base64
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import httpx
|
from fastapi import FastAPI, File, Query, Request, UploadFile
|
||||||
from fastapi import FastAPI, File, HTTPException, Query, Request, Response, UploadFile
|
from fastapi.responses import PlainTextResponse
|
||||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
|
||||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@@ -17,58 +11,45 @@ try:
|
|||||||
|
|
||||||
__version__ = version("artifactapi")
|
__version__ = version("artifactapi")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Fallback for development when package isn't installed
|
|
||||||
__version__ = "dev"
|
__version__ = "dev"
|
||||||
|
|
||||||
|
from .artifact import discovery, flush, local, proxy
|
||||||
|
from .artifact import docker as docker_handler
|
||||||
from .cache import RedisCache
|
from .cache import RedisCache
|
||||||
from .config import ConfigManager
|
from .config import ConfigManager
|
||||||
from .database import DatabaseManager
|
from .database import DatabaseManager
|
||||||
from .docker_auth import get_docker_token_for_response
|
|
||||||
from .metrics import MetricsManager
|
from .metrics import MetricsManager
|
||||||
from .storage import S3Storage
|
from .storage import S3Storage
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||||
|
|
||||||
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
|
if not config_path:
|
||||||
|
raise ValueError("CONFIG_PATH environment variable is required")
|
||||||
|
config = ConfigManager(config_path)
|
||||||
|
|
||||||
|
s3_config = config.get_s3_config()
|
||||||
|
redis_config = config.get_redis_config()
|
||||||
|
db_config = config.get_database_config()
|
||||||
|
|
||||||
|
storage = S3Storage(**s3_config)
|
||||||
|
cache = RedisCache(redis_config["url"])
|
||||||
|
database = DatabaseManager(db_config["url"])
|
||||||
|
metrics = MetricsManager(cache, database)
|
||||||
|
|
||||||
|
|
||||||
class ArtifactRequest(BaseModel):
|
class ArtifactRequest(BaseModel):
|
||||||
remote: str
|
remote: str
|
||||||
include_pattern: str
|
include_pattern: str
|
||||||
|
|
||||||
|
|
||||||
class UpstreamUnreachable(Exception):
|
|
||||||
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
|
||||||
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
|
||||||
|
|
||||||
# Initialize components using config
|
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
|
||||||
if not config_path:
|
|
||||||
raise ValueError("CONFIG_PATH environment variable is required")
|
|
||||||
config = ConfigManager(config_path)
|
|
||||||
|
|
||||||
# Get configurations
|
|
||||||
s3_config = config.get_s3_config()
|
|
||||||
redis_config = config.get_redis_config()
|
|
||||||
db_config = config.get_database_config()
|
|
||||||
|
|
||||||
# Initialize services
|
|
||||||
storage = S3Storage(**s3_config)
|
|
||||||
cache = RedisCache(redis_config["url"])
|
|
||||||
database = DatabaseManager(db_config["url"])
|
|
||||||
metrics = MetricsManager(cache, database)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def read_root():
|
def read_root():
|
||||||
config._check_reload()
|
config._check_reload()
|
||||||
return {
|
return {"message": "Artifact Storage API", "version": app.version, "remotes": list(config.config.get("remotes", {}).keys())}
|
||||||
"message": "Artifact Storage API",
|
|
||||||
"version": app.version,
|
|
||||||
"remotes": list(config.config.get("remotes", {}).keys()),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
@@ -76,772 +57,66 @@ def health_check():
|
|||||||
return {"status": "healthy"}
|
return {"status": "healthy"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/config")
|
||||||
|
def get_config():
|
||||||
|
return config.config
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/metrics")
|
||||||
|
def get_metrics(json: bool | None = Query(False, description="Return JSON format instead of Prometheus")):
|
||||||
|
config._check_reload()
|
||||||
|
if json:
|
||||||
|
return metrics.get_metrics(storage, config)
|
||||||
|
metrics.get_metrics(storage, config)
|
||||||
|
return PlainTextResponse(generate_latest().decode("utf-8"), media_type=CONTENT_TYPE_LATEST)
|
||||||
|
|
||||||
|
|
||||||
@app.put("/cache/flush")
|
@app.put("/cache/flush")
|
||||||
def flush_cache(
|
def flush_cache(
|
||||||
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
||||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
||||||
):
|
):
|
||||||
"""Flush cache entries for specified remote or all remotes"""
|
return flush.handle(remote, cache_type, cache, storage)
|
||||||
try:
|
|
||||||
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
|
||||||
|
|
||||||
# Flush Redis entries based on cache_type
|
|
||||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
|
||||||
patterns = []
|
|
||||||
|
|
||||||
if cache_type in ["all", "index"]:
|
|
||||||
if remote:
|
|
||||||
patterns.append(f"index:{remote}:*")
|
|
||||||
patterns.append(f"mutable:meta:{remote}:*")
|
|
||||||
else:
|
|
||||||
patterns.append("index:*")
|
|
||||||
patterns.append("mutable:meta:*")
|
|
||||||
|
|
||||||
if cache_type in ["all", "metrics"]:
|
|
||||||
if remote:
|
|
||||||
patterns.append(f"metrics:*:{remote}")
|
|
||||||
else:
|
|
||||||
patterns.append("metrics:*")
|
|
||||||
|
|
||||||
for pattern in patterns:
|
|
||||||
keys = cache.client.keys(pattern)
|
|
||||||
if keys:
|
|
||||||
cache.client.delete(*keys)
|
|
||||||
result["flushed"]["redis_keys"] += len(keys)
|
|
||||||
logger.info(f"Cache flush: Deleted {len(keys)} Redis keys matching '{pattern}'")
|
|
||||||
|
|
||||||
if result["flushed"]["redis_keys"] > 0:
|
|
||||||
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
|
||||||
|
|
||||||
# Flush S3 objects if requested
|
|
||||||
if cache_type in ["all", "files"]:
|
|
||||||
try:
|
|
||||||
# Use prefix filtering for remote-specific deletion
|
|
||||||
list_params = {"Bucket": storage.bucket}
|
|
||||||
if remote:
|
|
||||||
list_params["Prefix"] = f"{remote}/"
|
|
||||||
|
|
||||||
response = storage.client.list_objects_v2(**list_params)
|
|
||||||
if "Contents" in response:
|
|
||||||
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
|
||||||
|
|
||||||
for key in objects_to_delete:
|
|
||||||
try:
|
|
||||||
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
|
||||||
result["flushed"]["s3_objects"] += 1
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
|
||||||
|
|
||||||
if objects_to_delete:
|
|
||||||
scope = f" for remote '{remote}'" if remote else ""
|
|
||||||
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
|
||||||
logger.info(f"Cache flush: Deleted {len(objects_to_delete)} S3 objects{scope}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
|
||||||
logger.error(f"Cache flush S3 error: {e}")
|
|
||||||
|
|
||||||
if not result["flushed"]["operations"]:
|
|
||||||
result["flushed"]["operations"].append("No cache entries found to flush")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Cache flush error: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
async def construct_remote_url(remote_name: str, path: str) -> str:
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
|
|
||||||
base_url = remote_config.get("base_url")
|
|
||||||
if not base_url:
|
|
||||||
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
|
||||||
|
|
||||||
# Handle Docker registry URLs
|
|
||||||
if remote_config.get("package") == "docker":
|
|
||||||
# Convert Docker paths to v2 API format
|
|
||||||
# e.g., library/nginx/manifests/latest -> v2/library/nginx/manifests/latest
|
|
||||||
return f"{base_url}/v2/{path}"
|
|
||||||
|
|
||||||
return f"{base_url}/{path}"
|
|
||||||
|
|
||||||
|
|
||||||
async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: str, full_path: str) -> bool:
|
|
||||||
# Mutable files (index files) are always allowed through
|
|
||||||
mutable_patterns = config.get_mutable_patterns(remote_name)
|
|
||||||
if cache.is_mutable_file(file_path, mutable_patterns) or cache.is_mutable_file(full_path, mutable_patterns):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check immutable include patterns
|
|
||||||
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
|
||||||
if not patterns:
|
|
||||||
return True # Allow all if no patterns configured
|
|
||||||
|
|
||||||
pattern_matched = False
|
|
||||||
for pattern in patterns:
|
|
||||||
# Check both file_path and full_path to handle different pattern types
|
|
||||||
if re.search(pattern, file_path) or re.search(pattern, full_path):
|
|
||||||
pattern_matched = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not pattern_matched:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
|
||||||
# Use hierarchical path-based key
|
|
||||||
key = storage.get_object_key(remote_name, path)
|
|
||||||
|
|
||||||
if storage.exists(key):
|
|
||||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"cached_url": storage.get_url(key),
|
|
||||||
"status": "already_cached",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
remote_config = config.get_remote_config(remote_name) or {}
|
|
||||||
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
|
||||||
|
|
||||||
# Prepare headers
|
|
||||||
headers = {}
|
|
||||||
username = remote_config.get("username")
|
|
||||||
password = remote_config.get("password")
|
|
||||||
|
|
||||||
if is_docker:
|
|
||||||
if "/manifests/" in url:
|
|
||||||
headers["Accept"] = (
|
|
||||||
"application/vnd.docker.distribution.manifest.v2+json,"
|
|
||||||
"application/vnd.oci.image.manifest.v1+json,"
|
|
||||||
"application/vnd.oci.image.index.v1+json,"
|
|
||||||
"application/vnd.docker.distribution.manifest.list.v2+json"
|
|
||||||
)
|
|
||||||
elif "/blobs/" in url:
|
|
||||||
headers["Accept"] = "application/octet-stream"
|
|
||||||
elif username and password:
|
|
||||||
headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(url, headers=headers)
|
|
||||||
|
|
||||||
# Handle Docker Bearer token challenge
|
|
||||||
if response.status_code == 401 and is_docker:
|
|
||||||
www_auth = response.headers.get("WWW-Authenticate", "")
|
|
||||||
username = remote_config.get("username")
|
|
||||||
password = remote_config.get("password")
|
|
||||||
token = await get_docker_token_for_response(www_auth, username, password)
|
|
||||||
if token:
|
|
||||||
headers["Authorization"] = f"Bearer {token}"
|
|
||||||
response = await client.get(url, headers=headers)
|
|
||||||
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
storage_path = storage.upload(key, response.content)
|
|
||||||
|
|
||||||
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"cached_url": storage.get_url(key),
|
|
||||||
"storage_path": storage_path,
|
|
||||||
"size": len(response.content),
|
|
||||||
"status": "cached",
|
|
||||||
"etag": response.headers.get("ETag"),
|
|
||||||
"last_modified": response.headers.get("Last-Modified"),
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {"url": url, "status": "error", "error": str(e)}
|
|
||||||
|
|
||||||
|
|
||||||
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
|
||||||
username = remote_cfg.get("username")
|
|
||||||
password = remote_cfg.get("password")
|
|
||||||
if username and password:
|
|
||||||
token = base64.b64encode(f"{username}:{password}".encode()).decode()
|
|
||||||
return {"Authorization": f"Basic {token}"}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool:
|
|
||||||
"""HEAD with a short timeout. Returns False only on network/timeout errors."""
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
await client.head(url, headers=auth_headers or {}, timeout=10.0)
|
|
||||||
return True
|
|
||||||
except (httpx.NetworkError, httpx.TimeoutException):
|
|
||||||
return False
|
|
||||||
except Exception:
|
|
||||||
return True # 4xx/5xx means backend is up
|
|
||||||
|
|
||||||
|
|
||||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str, auth_headers: dict | None = None) -> bool:
|
|
||||||
"""Conditional HEAD against upstream. Returns False only on a definitive 304.
|
|
||||||
Raises UpstreamUnreachable if the backend cannot be contacted."""
|
|
||||||
meta = cache.get_mutable_meta(remote_name, path)
|
|
||||||
if not meta:
|
|
||||||
return True
|
|
||||||
|
|
||||||
headers = dict(auth_headers or {})
|
|
||||||
if meta.get("etag"):
|
|
||||||
headers["If-None-Match"] = meta["etag"]
|
|
||||||
if meta.get("last_modified"):
|
|
||||||
headers["If-Modified-Since"] = meta["last_modified"]
|
|
||||||
if not (meta.get("etag") or meta.get("last_modified")):
|
|
||||||
return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.head(remote_url, headers=headers)
|
|
||||||
return response.status_code != 304
|
|
||||||
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
|
||||||
raise UpstreamUnreachable(str(exc)) from exc
|
|
||||||
|
|
||||||
|
|
||||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
|
||||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
|
||||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
|
||||||
|
|
||||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
|
||||||
auth = _basic_auth_header(remote_cfg)
|
|
||||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
|
||||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
|
||||||
|
|
||||||
if user_mutable:
|
|
||||||
try:
|
|
||||||
changed = await check_upstream_changed(remote_url, remote_name, path, auth)
|
|
||||||
except UpstreamUnreachable:
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
|
||||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
|
||||||
return True
|
|
||||||
if not changed:
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
|
||||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
|
||||||
return True
|
|
||||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
|
||||||
else:
|
|
||||||
if not await _upstream_reachable(remote_url, auth):
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
|
||||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
|
||||||
return True
|
|
||||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
|
||||||
|
|
||||||
cache.cleanup_expired_index(storage, remote_name, path)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _get_content_type(filename: str) -> str:
|
|
||||||
if filename.endswith((".tar.gz", ".tgz")):
|
|
||||||
return "application/gzip"
|
|
||||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
|
||||||
return "application/zip"
|
|
||||||
if filename.endswith(".exe"):
|
|
||||||
return "application/x-msdownload"
|
|
||||||
if filename.endswith(".rpm"):
|
|
||||||
return "application/x-rpm"
|
|
||||||
if filename.endswith(".xml"):
|
|
||||||
return "application/xml"
|
|
||||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
|
||||||
return "application/gzip"
|
|
||||||
if filename.endswith((".yaml", ".yml")):
|
|
||||||
return "text/yaml"
|
|
||||||
return "application/octet-stream"
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_content(
|
|
||||||
data: bytes,
|
|
||||||
path: str,
|
|
||||||
filename: str,
|
|
||||||
remote_config: dict,
|
|
||||||
request: Request,
|
|
||||||
remote_name: str = "",
|
|
||||||
) -> tuple[bytes, str]:
|
|
||||||
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
|
||||||
if remote_config.get("package") == "pypi" and "simple/" in path:
|
|
||||||
files_url = remote_config.get("pypi_files_url", "https://files.pythonhosted.org")
|
|
||||||
files_remote = remote_config.get("pypi_files_remote", "pypi-files")
|
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
data = data.replace(
|
|
||||||
files_url.rstrip("/").encode(),
|
|
||||||
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
|
||||||
)
|
|
||||||
return data, "text/html; charset=utf-8"
|
|
||||||
if remote_config.get("package") == "npm" and not path.endswith(".tgz"):
|
|
||||||
files_url = remote_config.get("npm_files_url", "https://registry.npmjs.org")
|
|
||||||
files_remote = remote_config.get("npm_files_remote", "npm-files")
|
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
data = data.replace(
|
|
||||||
files_url.rstrip("/").encode(),
|
|
||||||
f"{proxy_base}/api/v1/remote/{files_remote}".encode(),
|
|
||||||
)
|
|
||||||
return data, "application/json"
|
|
||||||
if remote_config.get("package") == "helm" and filename == "index.yaml":
|
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
|
||||||
data = data.replace(
|
|
||||||
base_url.encode(),
|
|
||||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
|
||||||
)
|
|
||||||
return data, "text/yaml"
|
|
||||||
return data, _get_content_type(filename)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
|
||||||
async def get_artifact(request: Request, remote_name: str, path: str):
|
|
||||||
# Check if remote is configured
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
|
|
||||||
# Check if this is a local repository
|
|
||||||
if remote_config.get("type") == "local":
|
|
||||||
# Handle local repository download
|
|
||||||
metadata = database.get_local_file_metadata(remote_name, path)
|
|
||||||
if not metadata:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
# Get file from S3
|
|
||||||
content = storage.download_object(metadata["s3_key"])
|
|
||||||
if content is None:
|
|
||||||
raise HTTPException(status_code=500, detail="File not accessible")
|
|
||||||
|
|
||||||
# Determine content type
|
|
||||||
content_type = metadata.get("content_type", "application/octet-stream")
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=content,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract repository path for pattern checking
|
|
||||||
path_parts = path.split("/")
|
|
||||||
if len(path_parts) >= 2:
|
|
||||||
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
|
||||||
file_path = "/".join(path_parts[2:])
|
|
||||||
else:
|
|
||||||
repo_path = path
|
|
||||||
file_path = path
|
|
||||||
|
|
||||||
# Check if artifact matches configured patterns
|
|
||||||
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
|
||||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
|
||||||
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
|
||||||
|
|
||||||
# Construct the remote URL
|
|
||||||
remote_url = await construct_remote_url(remote_name, path)
|
|
||||||
|
|
||||||
# Check if artifact is already cached
|
|
||||||
cached_key = storage.get_object_key(remote_name, path)
|
|
||||||
if not storage.exists(cached_key):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
# For mutable files, check Redis TTL validity
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
|
||||||
|
|
||||||
if cached_key and is_mutable:
|
|
||||||
if not cache.is_index_valid(remote_name, path):
|
|
||||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
if cached_key:
|
|
||||||
# Return cached artifact
|
|
||||||
try:
|
|
||||||
artifact_data = storage.download_object(cached_key)
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
|
||||||
|
|
||||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
|
||||||
|
|
||||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
|
||||||
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=artifact_data,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={
|
|
||||||
"Content-Disposition": f"attachment; filename={filename}",
|
|
||||||
"X-Artifact-Source": "cache",
|
|
||||||
"X-Artifact-Size": str(len(artifact_data)),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
|
||||||
|
|
||||||
# Artifact not cached, cache it first
|
|
||||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
|
||||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
|
||||||
|
|
||||||
if result["status"] == "error":
|
|
||||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
|
||||||
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
|
||||||
|
|
||||||
# Mark mutable files as cached in Redis with TTL
|
|
||||||
if result["status"] == "cached" and is_mutable:
|
|
||||||
cache_config = config.get_cache_config(remote_name)
|
|
||||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
|
||||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
|
||||||
if result.get("etag") or result.get("last_modified"):
|
|
||||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
|
||||||
|
|
||||||
# Now return the cached artifact
|
|
||||||
try:
|
|
||||||
cache_key = storage.get_object_key(remote_name, path)
|
|
||||||
artifact_data = storage.download_object(cache_key)
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
|
||||||
|
|
||||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
|
||||||
cache_key = storage.get_object_key(remote_name, path)
|
|
||||||
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=artifact_data,
|
|
||||||
media_type=content_type,
|
|
||||||
headers={
|
|
||||||
"Content-Disposition": f"attachment; filename={filename}",
|
|
||||||
"X-Artifact-Source": "remote",
|
|
||||||
"X-Artifact-Size": str(len(artifact_data)),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v2/")
|
@app.get("/v2/")
|
||||||
async def docker_v2_ping():
|
async def docker_v2_ping():
|
||||||
return Response(
|
return docker_handler.ping()
|
||||||
content="{}",
|
|
||||||
media_type="application/json",
|
|
||||||
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
||||||
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
||||||
remote_config = config.get_remote_config(remote_name)
|
return await docker_handler.proxy(request, remote_name, path, storage, cache, config, metrics)
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
if remote_config.get("package") != "docker":
|
|
||||||
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
|
||||||
|
|
||||||
# Check immutable_patterns against the image name (e.g. "library/nginx")
|
|
||||||
patterns = config.get_immutable_patterns(remote_name, "")
|
|
||||||
if patterns:
|
|
||||||
path_parts = path.split("/")
|
|
||||||
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
|
||||||
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
|
||||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
|
||||||
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
|
||||||
|
|
||||||
remote_url = await construct_remote_url(remote_name, path)
|
|
||||||
|
|
||||||
cached_key = storage.get_object_key(remote_name, path)
|
|
||||||
if not storage.exists(cached_key):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
|
||||||
|
|
||||||
if cached_key and is_mutable:
|
|
||||||
if not cache.is_index_valid(remote_name, path):
|
|
||||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
|
||||||
cached_key = None
|
|
||||||
|
|
||||||
if not cached_key:
|
|
||||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
|
||||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
|
||||||
if result["status"] == "error":
|
|
||||||
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
|
||||||
if result["status"] == "cached" and is_mutable:
|
|
||||||
cache_config = config.get_cache_config(remote_name)
|
|
||||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
|
||||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
|
||||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
|
||||||
if result.get("etag") or result.get("last_modified"):
|
|
||||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
|
||||||
|
|
||||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
|
||||||
|
|
||||||
is_blob = "/blobs/" in path
|
|
||||||
if is_blob:
|
|
||||||
content_type = "application/octet-stream"
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
manifest_json = json.loads(artifact_data)
|
|
||||||
content_type = manifest_json.get("mediaType")
|
|
||||||
if not content_type:
|
|
||||||
if "manifests" in manifest_json:
|
|
||||||
content_type = "application/vnd.oci.image.index.v1+json"
|
|
||||||
else:
|
|
||||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
|
||||||
except Exception:
|
|
||||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
|
||||||
|
|
||||||
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
|
||||||
headers = {
|
|
||||||
"Docker-Distribution-Api-Version": "registry/2.0",
|
|
||||||
"Docker-Content-Digest": digest,
|
|
||||||
"Content-Length": str(len(artifact_data)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if request.method == "HEAD":
|
|
||||||
return Response(status_code=200, headers=headers, media_type=content_type)
|
|
||||||
|
|
||||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
|
||||||
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
|
||||||
|
|
||||||
|
|
||||||
async def discover_artifacts(remote: str, include_pattern: str) -> list[str]:
|
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
if "github.com" in remote:
|
async def get_artifact(request: Request, remote_name: str, path: str):
|
||||||
return await discover_github_releases(remote, include_pattern)
|
return await proxy.handle(request, remote_name, path, storage, cache, config, database, metrics)
|
||||||
else:
|
|
||||||
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
|
||||||
|
|
||||||
|
|
||||||
async def discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
|
||||||
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
|
||||||
if not match:
|
|
||||||
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
|
||||||
|
|
||||||
owner, repo = match.groups()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
||||||
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=response.status_code,
|
|
||||||
detail=f"Failed to fetch releases: {response.text}",
|
|
||||||
)
|
|
||||||
|
|
||||||
releases = response.json()
|
|
||||||
|
|
||||||
matching_urls = []
|
|
||||||
pattern = include_pattern.replace("*", ".*")
|
|
||||||
regex = re.compile(pattern)
|
|
||||||
|
|
||||||
for release in releases:
|
|
||||||
for asset in release.get("assets", []):
|
|
||||||
download_url = asset["browser_download_url"]
|
|
||||||
if regex.search(download_url):
|
|
||||||
matching_urls.append(download_url)
|
|
||||||
|
|
||||||
return matching_urls
|
|
||||||
|
|
||||||
|
|
||||||
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
||||||
"""Upload a file to local repository"""
|
return await local.upload(remote_name, path, file, storage, database, config)
|
||||||
# Check if remote is configured and is local
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
|
|
||||||
if remote_config.get("type") != "local":
|
|
||||||
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Read file content
|
|
||||||
content = await file.read()
|
|
||||||
|
|
||||||
# Calculate SHA256
|
|
||||||
sha256_sum = hashlib.sha256(content).hexdigest()
|
|
||||||
|
|
||||||
# Check if file already exists (prevent overwrite)
|
|
||||||
if database.file_exists(remote_name, path):
|
|
||||||
raise HTTPException(status_code=409, detail="File already exists")
|
|
||||||
|
|
||||||
# Generate S3 key
|
|
||||||
s3_key = f"local/{remote_name}/{path}"
|
|
||||||
|
|
||||||
# Determine content type
|
|
||||||
content_type = file.content_type or "application/octet-stream"
|
|
||||||
|
|
||||||
# Upload to S3
|
|
||||||
try:
|
|
||||||
storage.upload(s3_key, content)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
|
||||||
|
|
||||||
# Add to database
|
|
||||||
success = database.add_local_file(
|
|
||||||
repository_name=remote_name,
|
|
||||||
file_path=path,
|
|
||||||
s3_key=s3_key,
|
|
||||||
size_bytes=len(content),
|
|
||||||
sha256_sum=sha256_sum,
|
|
||||||
content_type=content_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
# Clean up S3 if database insert failed
|
|
||||||
storage.delete_object(s3_key)
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
|
||||||
|
|
||||||
return JSONResponse(
|
|
||||||
{
|
|
||||||
"message": "File uploaded successfully",
|
|
||||||
"file_path": path,
|
|
||||||
"size_bytes": len(content),
|
|
||||||
"sha256_sum": sha256_sum,
|
|
||||||
"content_type": content_type,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
def check_file_exists(remote_name: str, path: str):
|
def check_file_exists(remote_name: str, path: str):
|
||||||
"""Check if file exists (for CI jobs) - supports local repositories only"""
|
return local.check_exists(remote_name, path, database, config)
|
||||||
# Check if remote is configured
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
|
|
||||||
# Handle local repository
|
|
||||||
if remote_config.get("type") == "local":
|
|
||||||
try:
|
|
||||||
metadata = database.get_local_file_metadata(remote_name, path)
|
|
||||||
if not metadata:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
headers={
|
|
||||||
"Content-Length": str(metadata["size_bytes"]),
|
|
||||||
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
|
||||||
"X-SHA256": metadata["sha256_sum"],
|
|
||||||
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
|
||||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
|
||||||
else:
|
|
||||||
# For remote repositories, just return 405 Method Not Allowed
|
|
||||||
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||||
def delete_file(remote_name: str, path: str):
|
def delete_file(remote_name: str, path: str):
|
||||||
"""Delete a file from local repository"""
|
return local.delete(remote_name, path, storage, database, config)
|
||||||
# Check if remote is configured and is local
|
|
||||||
remote_config = config.get_remote_config(remote_name)
|
|
||||||
if not remote_config:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
|
||||||
|
|
||||||
if remote_config.get("type") != "local":
|
|
||||||
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get S3 key before deleting from database
|
|
||||||
s3_key = database.delete_local_file(remote_name, path)
|
|
||||||
if not s3_key:
|
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
|
||||||
|
|
||||||
# Delete from S3
|
|
||||||
if not storage.delete_object(s3_key):
|
|
||||||
# File was deleted from database but not from S3 - log warning but continue
|
|
||||||
print(f"Warning: Failed to delete S3 object {s3_key}")
|
|
||||||
|
|
||||||
return JSONResponse({"message": "File deleted successfully"})
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/v1/artifacts/cache")
|
@app.post("/api/v1/artifacts/cache")
|
||||||
async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
async def cache_artifact(request: ArtifactRequest):
|
||||||
try:
|
return await discovery.cache_artifacts(request.remote, request.include_pattern, storage)
|
||||||
matching_urls = await discover_artifacts(request.remote, request.include_pattern)
|
|
||||||
|
|
||||||
if not matching_urls:
|
|
||||||
return {
|
|
||||||
"message": "No matching artifacts found",
|
|
||||||
"cached_count": 0,
|
|
||||||
"artifacts": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
cached_artifacts = []
|
|
||||||
|
|
||||||
for url in matching_urls:
|
|
||||||
result = await cache_single_artifact(url, "", "")
|
|
||||||
cached_artifacts.append(result)
|
|
||||||
|
|
||||||
cached_count = sum(1 for artifact in cached_artifacts if artifact["status"] in ["cached", "already_cached"])
|
|
||||||
|
|
||||||
return {
|
|
||||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
|
||||||
"cached_count": cached_count,
|
|
||||||
"artifacts": cached_artifacts,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/artifacts/{remote:path}")
|
@app.get("/api/v1/artifacts/{remote:path}")
|
||||||
async def list_cached_artifacts(remote: str, include_pattern: str = ".*") -> dict[str, Any]:
|
async def list_cached_artifacts(remote: str, include_pattern: str = ".*"):
|
||||||
try:
|
return await discovery.list_artifacts(remote, include_pattern, storage)
|
||||||
matching_urls = await discover_artifacts(remote, include_pattern)
|
|
||||||
|
|
||||||
cached_artifacts = []
|
|
||||||
for url in matching_urls:
|
|
||||||
# Extract path from URL for hierarchical key generation
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
parsed = urlparse(url)
|
|
||||||
path = parsed.path
|
|
||||||
key = storage.get_object_key(remote, path)
|
|
||||||
if storage.exists(key):
|
|
||||||
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"remote": remote,
|
|
||||||
"pattern": include_pattern,
|
|
||||||
"total_found": len(matching_urls),
|
|
||||||
"cached_count": len(cached_artifacts),
|
|
||||||
"artifacts": cached_artifacts,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/metrics")
|
|
||||||
def get_metrics(
|
|
||||||
json: bool | None = Query(False, description="Return JSON format instead of Prometheus"),
|
|
||||||
):
|
|
||||||
"""Get comprehensive metrics about the artifact storage system"""
|
|
||||||
config._check_reload()
|
|
||||||
|
|
||||||
if json:
|
|
||||||
# Return JSON format
|
|
||||||
return metrics.get_metrics(storage, config)
|
|
||||||
else:
|
|
||||||
# Return Prometheus format
|
|
||||||
metrics.get_metrics(storage, config) # Update gauges
|
|
||||||
prometheus_data = generate_latest().decode("utf-8")
|
|
||||||
return PlainTextResponse(prometheus_data, media_type=CONTENT_TYPE_LATEST)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/config")
|
|
||||||
def get_config():
|
|
||||||
return config.config
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from . import generic, helm, npm, python, rpm
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
def get_content_type(filename: str) -> str:
|
||||||
|
if filename.endswith((".tar.gz", ".tgz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||||
|
return "application/zip"
|
||||||
|
if filename.endswith(".exe"):
|
||||||
|
return "application/x-msdownload"
|
||||||
|
if filename.endswith(".rpm"):
|
||||||
|
return "application/x-rpm"
|
||||||
|
if filename.endswith(".xml"):
|
||||||
|
return "application/xml"
|
||||||
|
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith((".yaml", ".yml")):
|
||||||
|
return "text/yaml"
|
||||||
|
return "application/octet-stream"
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if filename == "index.yaml":
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/yaml"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "application/json"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(base_url: str, path: str) -> str:
|
||||||
|
"""Build the upstream URL for a PyPI request.
|
||||||
|
|
||||||
|
PyPI splits simple/ index pages (pypi.org) from file downloads
|
||||||
|
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
|
||||||
|
"""
|
||||||
|
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||||
|
return f"https://pypi.org/{path}"
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/html; charset=utf-8"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .s3 import S3Storage
|
||||||
|
|
||||||
|
__all__ = ["S3Storage"]
|
||||||
@@ -41,7 +41,6 @@ class S3Storage:
|
|||||||
|
|
||||||
self.client = boto3.client("s3", **client_kwargs)
|
self.client = boto3.client("s3", **client_kwargs)
|
||||||
|
|
||||||
# Try to ensure bucket exists, but don't fail if MinIO isn't ready yet
|
|
||||||
try:
|
try:
|
||||||
self._ensure_bucket_exists()
|
self._ensure_bucket_exists()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -55,25 +54,21 @@ class S3Storage:
|
|||||||
self.client.create_bucket(Bucket=self.bucket)
|
self.client.create_bucket(Bucket=self.bucket)
|
||||||
|
|
||||||
def get_object_key(self, remote_name: str, path: str) -> str:
|
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||||
# Extract directory path and filename
|
|
||||||
clean_path = path.lstrip("/")
|
clean_path = path.lstrip("/")
|
||||||
filename = os.path.basename(clean_path)
|
filename = os.path.basename(clean_path)
|
||||||
directory_path = os.path.dirname(clean_path)
|
directory_path = os.path.dirname(clean_path)
|
||||||
|
|
||||||
# Special handling for Docker registry blobs (use digest as key for deduplication)
|
# Docker blobs are keyed by digest for deduplication across images
|
||||||
if "/blobs/sha256:" in clean_path:
|
if "/blobs/sha256:" in clean_path:
|
||||||
# Extract the SHA256 digest for Docker blobs
|
|
||||||
parts = clean_path.split("/blobs/sha256:")
|
parts = clean_path.split("/blobs/sha256:")
|
||||||
if len(parts) == 2:
|
if len(parts) == 2:
|
||||||
digest = parts[1]
|
digest = parts[1]
|
||||||
return f"{remote_name}/blobs/sha256/{digest}"
|
return f"{remote_name}/blobs/sha256/{digest}"
|
||||||
|
|
||||||
# Hash the directory path to keep keys manageable while preserving remote structure
|
|
||||||
if directory_path:
|
if directory_path:
|
||||||
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
||||||
return f"{remote_name}/{path_hash}/{filename}"
|
return f"{remote_name}/{path_hash}/{filename}"
|
||||||
else:
|
else:
|
||||||
# If no directory, just use remote and filename
|
|
||||||
return f"{remote_name}/{filename}"
|
return f"{remote_name}/{filename}"
|
||||||
|
|
||||||
def exists(self, key: str) -> bool:
|
def exists(self, key: str) -> bool:
|
||||||
+23
-15
@@ -73,30 +73,20 @@ TEST_REMOTES = {
|
|||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
},
|
},
|
||||||
"pypi-test": {
|
"pypi-test": {
|
||||||
"base_url": "https://pypi.org",
|
|
||||||
"type": "remote",
|
|
||||||
"package": "pypi",
|
|
||||||
"pypi_files_url": "https://files.pythonhosted.org",
|
|
||||||
"pypi_files_remote": "pypi-files-test",
|
|
||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
|
||||||
},
|
|
||||||
"pypi-files-test": {
|
|
||||||
"base_url": "https://files.pythonhosted.org",
|
"base_url": "https://files.pythonhosted.org",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"package": "generic",
|
"package": "pypi",
|
||||||
"immutable_patterns": [
|
"immutable_patterns": [
|
||||||
"packages/.*\\.whl$",
|
r"packages/.*\.whl$",
|
||||||
"packages/.*\\.whl\\.metadata$",
|
r"packages/.*\.whl\.metadata$",
|
||||||
"packages/.*\\.tar\\.gz$",
|
r"packages/.*\.tar\.gz$",
|
||||||
],
|
],
|
||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
},
|
},
|
||||||
"npm-test": {
|
"npm-test": {
|
||||||
"base_url": "https://registry.npmjs.org",
|
"base_url": "https://registry.npmjs.org",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"package": "npm",
|
"package": "npm",
|
||||||
"npm_files_url": "https://registry.npmjs.org",
|
|
||||||
"npm_files_remote": "npm-test",
|
|
||||||
"immutable_patterns": [r"\.tgz$"],
|
"immutable_patterns": [r"\.tgz$"],
|
||||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||||
@@ -108,6 +98,24 @@ TEST_REMOTES = {
|
|||||||
"immutable_patterns": [r"\.tgz$"],
|
"immutable_patterns": [r"\.tgz$"],
|
||||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||||
},
|
},
|
||||||
|
"quarantine-test": {
|
||||||
|
"base_url": "https://releases.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 3,
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
|
"quarantine-disabled": {
|
||||||
|
"base_url": "https://releases.example.com",
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||||
|
"quarantine_new": False,
|
||||||
|
"quarantine_days": 3,
|
||||||
|
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -283,3 +283,47 @@ class TestMutableMeta:
|
|||||||
|
|
||||||
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
||||||
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# artifact published date (quarantine support)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestArtifactPublished:
|
||||||
|
def test_key_format_is_deterministic(self, bare_cache):
|
||||||
|
path = "some/path/package-1.0.tar.gz"
|
||||||
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||||
|
assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}"
|
||||||
|
|
||||||
|
def test_key_hash_is_16_chars(self, bare_cache):
|
||||||
|
key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl")
|
||||||
|
assert len(key.split(":")[-1]) == 16
|
||||||
|
|
||||||
|
def test_different_paths_produce_different_keys(self, bare_cache):
|
||||||
|
k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz")
|
||||||
|
k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz")
|
||||||
|
assert k1 != k2
|
||||||
|
|
||||||
|
def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client):
|
||||||
|
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
||||||
|
cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm)
|
||||||
|
expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz")
|
||||||
|
mock_redis_client.set.assert_called_once_with(expected_key, lm)
|
||||||
|
|
||||||
|
def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client):
|
||||||
|
lm = "Tue, 15 Mar 2022 12:00:00 GMT"
|
||||||
|
mock_redis_client.get.return_value = lm
|
||||||
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||||
|
assert result == lm
|
||||||
|
|
||||||
|
def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client):
|
||||||
|
mock_redis_client.get.return_value = None
|
||||||
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
||||||
|
unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT")
|
||||||
|
|
||||||
|
def test_get_returns_none_when_unavailable(self, unavailable_cache):
|
||||||
|
assert unavailable_cache.get_artifact_published("remote", "path") is None
|
||||||
|
|||||||
@@ -351,3 +351,190 @@ class TestConfigReload:
|
|||||||
cfg._check_reload()
|
cfg._check_reload()
|
||||||
|
|
||||||
assert "repo-a" in cfg.config["remotes"]
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_quarantine_config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetQuarantineConfig:
|
||||||
|
def test_returns_false_zero_when_not_configured(self, make_config):
|
||||||
|
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
def test_returns_false_zero_for_missing_remote(self, make_config):
|
||||||
|
cfg = make_config({})
|
||||||
|
enabled, days = cfg.get_quarantine_config("nonexistent")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
def test_enabled_true_and_days_returned(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 7,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is True
|
||||||
|
assert days == 7
|
||||||
|
|
||||||
|
def test_quarantine_new_false_returns_disabled(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": False,
|
||||||
|
"quarantine_days": 7,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is False
|
||||||
|
assert days == 7
|
||||||
|
|
||||||
|
def test_enabled_with_zero_days_returns_zero(self, make_config):
|
||||||
|
cfg = make_config(
|
||||||
|
{
|
||||||
|
"r": {
|
||||||
|
"type": "remote",
|
||||||
|
"package": "generic",
|
||||||
|
"base_url": "https://x.com",
|
||||||
|
"quarantine_new": True,
|
||||||
|
"quarantine_days": 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
enabled, days = cfg.get_quarantine_config("r")
|
||||||
|
assert enabled is True
|
||||||
|
assert days == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Directory mode (CONFIG_PATH points to a directory)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _remote(base_url: str = "https://x.com") -> dict:
|
||||||
|
return {"type": "remote", "package": "generic", "base_url": base_url}
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigDirMode:
|
||||||
|
def test_loads_all_yaml_files(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_later_file_overrides_earlier_on_same_key(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://first.com")}}))
|
||||||
|
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://second.com")}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert cfg.config["remotes"]["r"]["base_url"] == "https://second.com"
|
||||||
|
|
||||||
|
def test_empty_directory_returns_empty_remotes(self, tmp_path):
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert cfg.config == {"remotes": {}}
|
||||||
|
|
||||||
|
def test_ignores_non_yaml_files(self, tmp_path):
|
||||||
|
(tmp_path / "notes.txt").write_text("not yaml")
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert list(cfg.config["remotes"].keys()) == ["repo-a"]
|
||||||
|
|
||||||
|
def test_reload_picks_up_new_file(self, tmp_path):
|
||||||
|
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(tmp_path))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" not in cfg.config["remotes"]
|
||||||
|
|
||||||
|
new_file = tmp_path / "b.yaml"
|
||||||
|
new_file.write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||||
|
future_mtime = cfg._last_modified + 1
|
||||||
|
os.utime(str(new_file), (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
assert "repo-b" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# config_dir key (main file contains a config_dir pointer)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigDirKey:
|
||||||
|
def test_merges_remotes_from_config_dir(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "remotes.yaml").write_text(yaml.dump({"remotes": {"repo-extra": _remote("https://extra.com")}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-main" in cfg.config["remotes"]
|
||||||
|
assert "repo-extra" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_relative_config_dir_resolved_from_main_file(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "r.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": "conf.d", "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-a" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
def test_config_dir_key_not_present_in_loaded_config(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "config_dir" not in cfg.config
|
||||||
|
|
||||||
|
def test_dir_remote_overrides_main_file_remote(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
(conf_d / "override.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://new.com")}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"r": _remote("https://old.com")}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert cfg.config["remotes"]["r"]["base_url"] == "https://new.com"
|
||||||
|
|
||||||
|
def test_empty_config_dir_uses_main_file_only(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert list(cfg.config["remotes"].keys()) == ["repo-main"]
|
||||||
|
|
||||||
|
def test_reload_picks_up_changed_dir_file(self, tmp_path):
|
||||||
|
conf_d = tmp_path / "conf.d"
|
||||||
|
conf_d.mkdir()
|
||||||
|
dir_file = conf_d / "r.yaml"
|
||||||
|
dir_file.write_text(yaml.dump({"remotes": {"repo-v1": _remote()}}))
|
||||||
|
main = tmp_path / "config.yaml"
|
||||||
|
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||||
|
cfg = ConfigManager(str(main))
|
||||||
|
assert "repo-v1" in cfg.config["remotes"]
|
||||||
|
|
||||||
|
dir_file.write_text(yaml.dump({"remotes": {"repo-v2": _remote("https://v2.com")}}))
|
||||||
|
future_mtime = cfg._last_modified + 1
|
||||||
|
os.utime(str(dir_file), (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
cfg._check_reload()
|
||||||
|
|
||||||
|
assert "repo-v2" in cfg.config["remotes"]
|
||||||
|
assert "repo-v1" not in cfg.config["remotes"]
|
||||||
|
|||||||
+178
-27
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
from datetime import UTC
|
||||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -204,7 +205,7 @@ class TestDockerProxy:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -226,7 +227,7 @@ class TestDockerProxy:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
):
|
):
|
||||||
@@ -248,9 +249,9 @@ class TestDockerProxy:
|
|||||||
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
||||||
deps["storage"].download_object.return_value = manifest
|
deps["storage"].download_object.return_value = manifest
|
||||||
|
|
||||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -352,7 +353,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = False
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -369,7 +370,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = False
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
):
|
):
|
||||||
@@ -384,7 +385,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
):
|
):
|
||||||
@@ -399,7 +400,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = False
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "error", "error": "upstream unreachable"},
|
return_value={"status": "error", "error": "upstream unreachable"},
|
||||||
):
|
):
|
||||||
@@ -430,7 +431,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=False):
|
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=False):
|
||||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@@ -446,8 +447,8 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
@@ -462,8 +463,8 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||||
mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None}
|
mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None}
|
||||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
@@ -472,7 +473,7 @@ class TestGenericArtifactRoute:
|
|||||||
|
|
||||||
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
||||||
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||||
from artifactapi.main import UpstreamUnreachable
|
from artifactapi.artifact.proxy import UpstreamUnreachable
|
||||||
|
|
||||||
deps = patched_deps
|
deps = patched_deps
|
||||||
deps["storage"].exists.return_value = True
|
deps["storage"].exists.return_value = True
|
||||||
@@ -481,7 +482,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||||
|
|
||||||
with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
with patch("artifactapi.artifact.proxy.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@@ -496,7 +497,7 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
|
||||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||||
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@@ -510,8 +511,8 @@ class TestGenericArtifactRoute:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
deps["cache"].is_index_valid.return_value = False
|
deps["cache"].is_index_valid.return_value = False
|
||||||
|
|
||||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock) as mock_check:
|
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock) as mock_check:
|
||||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||||
client.get("/api/v1/remote/custom-index-test/metadata.json")
|
client.get("/api/v1/remote/custom-index-test/metadata.json")
|
||||||
|
|
||||||
@@ -685,7 +686,7 @@ class TestPyPIRemote:
|
|||||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert b"files.pythonhosted.org" not in response.content
|
assert b"files.pythonhosted.org" not in response.content
|
||||||
assert b"/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz" in response.content
|
assert b"/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz" in response.content
|
||||||
|
|
||||||
def test_simple_index_content_type_is_html(self, client, patched_deps):
|
def test_simple_index_content_type_is_html(self, client, patched_deps):
|
||||||
deps = patched_deps
|
deps = patched_deps
|
||||||
@@ -706,7 +707,7 @@ class TestPyPIRemote:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -722,7 +723,7 @@ class TestPyPIRemote:
|
|||||||
deps["storage"].download_object.return_value = b"PK wheel bytes"
|
deps["storage"].download_object.return_value = b"PK wheel bytes"
|
||||||
deps["cache"].is_mutable_file.return_value = False
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0-py3-none-any.whl")
|
response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0-py3-none-any.whl")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "application/zip" in response.headers["content-type"]
|
assert "application/zip" in response.headers["content-type"]
|
||||||
assert response.headers["X-Artifact-Source"] == "cache"
|
assert response.headers["X-Artifact-Source"] == "cache"
|
||||||
@@ -733,13 +734,13 @@ class TestPyPIRemote:
|
|||||||
deps["storage"].download_object.return_value = b"tar bytes"
|
deps["storage"].download_object.return_value = b"tar bytes"
|
||||||
deps["cache"].is_mutable_file.return_value = False
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests-2.31.0.tar.gz")
|
response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "application/gzip" in response.headers["content-type"]
|
assert "application/gzip" in response.headers["content-type"]
|
||||||
|
|
||||||
def test_blocked_path_on_files_remote_returns_403(self, client, patched_deps):
|
def test_unknown_extension_on_pypi_remote_returns_403(self, client, patched_deps):
|
||||||
"""Paths that don't match immutable_patterns on pypi-files-test are blocked."""
|
"""Paths that don't match immutable_patterns and aren't mutable are blocked."""
|
||||||
response = client.get("/api/v1/remote/pypi-files-test/packages/requests.unknown")
|
response = client.get("/api/v1/remote/pypi-test/packages/requests.unknown")
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
@@ -821,7 +822,7 @@ class TestNpmRemote:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -907,7 +908,7 @@ class TestHelmRemote:
|
|||||||
deps["cache"].is_mutable_file.return_value = True
|
deps["cache"].is_mutable_file.return_value = True
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"artifactapi.main.cache_single_artifact",
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
return_value={"status": "cached"},
|
return_value={"status": "cached"},
|
||||||
) as mock_fetch:
|
) as mock_fetch:
|
||||||
@@ -924,3 +925,153 @@ class TestHelmRemote:
|
|||||||
|
|
||||||
response = client.get("/api/v1/remote/helm-test/vault.zip")
|
response = client.get("/api/v1/remote/helm-test/vault.zip")
|
||||||
assert response.status_code == 403
|
assert response.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Quarantine (quarantine-test remote: quarantine_new=True, quarantine_days=3)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestQuarantine:
|
||||||
|
def _recent_date(self, days_ago=1):
|
||||||
|
"""Return an HTTP-format date string N days in the past (within quarantine window)."""
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from email.utils import format_datetime
|
||||||
|
|
||||||
|
dt = datetime.now(UTC) - timedelta(days=days_ago)
|
||||||
|
return format_datetime(dt, usegmt=True)
|
||||||
|
|
||||||
|
def _old_date(self, days_ago=10):
|
||||||
|
"""Return an HTTP-format date string N days in the past (outside quarantine window)."""
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from email.utils import format_datetime
|
||||||
|
|
||||||
|
dt = datetime.now(UTC) - timedelta(days=days_ago)
|
||||||
|
return format_datetime(dt, usegmt=True)
|
||||||
|
|
||||||
|
def test_cache_miss_recent_artifact_quarantined(self, client, patched_deps):
|
||||||
|
"""Cache miss: artifact published within quarantine window → 404."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||||
|
):
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
assert "quarantined" in response.json()["detail"].lower()
|
||||||
|
|
||||||
|
def test_cache_miss_old_artifact_allowed(self, client, patched_deps):
|
||||||
|
"""Cache miss: artifact published outside quarantine window → 200."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached", "last_modified": self._old_date()},
|
||||||
|
):
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_cache_miss_no_last_modified_fails_open(self, client, patched_deps):
|
||||||
|
"""Cache miss: no Last-Modified header → fail open (200, not quarantined)."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached", "last_modified": None},
|
||||||
|
):
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_cache_hit_recent_artifact_quarantined(self, client, patched_deps):
|
||||||
|
"""Cache hit: stored publish date within quarantine window → 404."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
deps["cache"].get_artifact_published.return_value = self._recent_date()
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
assert "quarantined" in response.json()["detail"].lower()
|
||||||
|
|
||||||
|
def test_cache_hit_old_artifact_allowed(self, client, patched_deps):
|
||||||
|
"""Cache hit: stored publish date outside quarantine window → 200."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
deps["cache"].get_artifact_published.return_value = self._old_date()
|
||||||
|
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_cache_hit_no_stored_date_fetches_upstream(self, client, patched_deps):
|
||||||
|
"""Cache hit: no stored date → HEAD upstream to get Last-Modified."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = True
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
deps["cache"].get_artifact_published.return_value = None
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy._fetch_last_modified",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=self._old_date(),
|
||||||
|
) as mock_fetch:
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
mock_fetch.assert_called_once()
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_quarantine_disabled_allows_recent_artifact(self, client, patched_deps):
|
||||||
|
"""quarantine_new=False: recent artifacts are not blocked."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||||
|
):
|
||||||
|
response = client.get("/api/v1/remote/quarantine-disabled/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_quarantine_detail_includes_available_date(self, client, patched_deps):
|
||||||
|
"""The 404 detail should include the date when the artifact becomes available."""
|
||||||
|
deps = patched_deps
|
||||||
|
deps["storage"].exists.return_value = False
|
||||||
|
deps["storage"].download_object.return_value = b"content"
|
||||||
|
deps["cache"].is_mutable_file.return_value = False
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||||
|
):
|
||||||
|
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
detail = response.json()["detail"]
|
||||||
|
assert "available after" in detail
|
||||||
|
assert "3-day" in detail
|
||||||
|
|||||||
Reference in New Issue
Block a user