Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4789635e87 | |||
| ba52fedd27 | |||
| 76633403b2 | |||
| cae3503ac4 | |||
| 3f098df428 | |||
| 64266f40e9 | |||
| be25fc19f7 | |||
| 3bd3ca8b74 | |||
| 373366e695 | |||
| e6d9b175ce | |||
| 0daca40156 | |||
| 0df726467a | |||
| b8bc7f8714 | |||
| 0c780c1bd1 | |||
| 173b5d8b10 | |||
| 3352a3e886 | |||
| 8adcbac405 | |||
| 4ca89b9159 | |||
| 25b85ddc92 | |||
| d585ab425c | |||
| 6b1a6c9eb4 | |||
| 5de912db75 | |||
| 8e9d313892 |
@@ -0,0 +1,11 @@
|
||||
remotes:
|
||||
alpine:
|
||||
base_url: "https://dl-cdn.alpinelinux.org"
|
||||
type: "remote"
|
||||
package: "alpine"
|
||||
description: "Alpine Linux APK package repository"
|
||||
immutable_patterns:
|
||||
- ".*/x86_64/.*\\.apk$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 7200
|
||||
@@ -0,0 +1,12 @@
|
||||
remotes:
|
||||
github:
|
||||
base_url: "https://github.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub releases and files"
|
||||
immutable_patterns:
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 0
|
||||
@@ -0,0 +1,17 @@
|
||||
remotes:
|
||||
pypi:
|
||||
base_url: "https://files.pythonhosted.org"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Python Package Index"
|
||||
check_mutable_updates: true
|
||||
quarantine_new: true
|
||||
quarantine_days: 3
|
||||
immutable_patterns:
|
||||
- "packages/.*\\.whl$"
|
||||
- "packages/.*\\.whl\\.metadata$"
|
||||
- "packages/.*\\.tar\\.gz$"
|
||||
- "packages/.*\\.zip$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600
|
||||
+1
-1
@@ -10,7 +10,7 @@ services:
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./remotes.yaml:/app/remotes.yaml:ro,z
|
||||
- ./examples/single-file/remotes.yaml:/app/remotes.yaml:ro,z
|
||||
- ./ca-bundle.pem:/app/ca-bundle.pem:ro,z
|
||||
environment:
|
||||
- CONFIG_PATH=/app/remotes.yaml
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
remotes:
|
||||
alpine:
|
||||
base_url: "https://dl-cdn.alpinelinux.org"
|
||||
type: "remote"
|
||||
package: "alpine"
|
||||
description: "Alpine Linux APK package repository"
|
||||
immutable_patterns:
|
||||
- ".*/x86_64/.*\\.apk$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 7200
|
||||
@@ -0,0 +1,12 @@
|
||||
remotes:
|
||||
github:
|
||||
base_url: "https://github.com"
|
||||
type: "remote"
|
||||
package: "generic"
|
||||
description: "GitHub releases and files"
|
||||
immutable_patterns:
|
||||
- "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*"
|
||||
- "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 0
|
||||
@@ -0,0 +1,17 @@
|
||||
remotes:
|
||||
pypi:
|
||||
base_url: "https://files.pythonhosted.org"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Python Package Index"
|
||||
check_mutable_updates: true
|
||||
quarantine_new: true
|
||||
quarantine_days: 3
|
||||
immutable_patterns:
|
||||
- "packages/.*\\.whl$"
|
||||
- "packages/.*\\.whl\\.metadata$"
|
||||
- "packages/.*\\.tar\\.gz$"
|
||||
- "packages/.*\\.zip$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600
|
||||
@@ -9,6 +9,13 @@
|
||||
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
|
||||
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
|
||||
#
|
||||
# quarantine_new: Set to true to block immutable artifacts published within the last
|
||||
# quarantine_days days. Requests return 404 until the quarantine period
|
||||
# expires. Fails open when the publish date cannot be determined.
|
||||
# quarantine_days: Number of days to quarantine newly published artifacts (requires
|
||||
# quarantine_new: true). The upstream Last-Modified header is used as
|
||||
# the publish date.
|
||||
#
|
||||
# WARNING: this file may contain credentials — do not commit real values.
|
||||
#
|
||||
# Global configuration
|
||||
@@ -194,6 +201,73 @@ remotes:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 300
|
||||
|
||||
pypi:
|
||||
base_url: "https://files.pythonhosted.org"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Python Package Index — simple index and package files via a single remote"
|
||||
# simple/ requests are transparently fetched from pypi.org; package files come from
|
||||
# files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote.
|
||||
check_mutable_updates: true
|
||||
# Block packages published within the last 3 days (supply-chain attack mitigation).
|
||||
# Immutable artifacts (wheel/sdist) newer than quarantine_days return 404 until
|
||||
# the window passes. Disable by setting quarantine_new: false or removing both keys.
|
||||
quarantine_new: true
|
||||
quarantine_days: 3
|
||||
immutable_patterns:
|
||||
- "packages/.*\\.whl$"
|
||||
- "packages/.*\\.whl\\.metadata$"
|
||||
- "packages/.*\\.tar\\.gz$"
|
||||
- "packages/.*\\.zip$"
|
||||
- "packages/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # Simple index pages refreshed after 10 minutes
|
||||
|
||||
pypi-gitea:
|
||||
base_url: "https://gitea.example.com/api/packages/myorg/pypi"
|
||||
type: "remote"
|
||||
package: "pypi"
|
||||
description: "Private Gitea PyPI registry — simple index and files at the same host"
|
||||
# username: "your-gitea-username"
|
||||
# password: "your-personal-access-token" # needs package:read scope
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "files/.*\\.whl$"
|
||||
- "files/.*\\.whl\\.metadata$"
|
||||
- "files/.*\\.tar\\.gz$"
|
||||
- "files/.*\\.zip$"
|
||||
- "files/.*\\.egg$"
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600
|
||||
|
||||
npm:
|
||||
base_url: "https://registry.npmjs.org"
|
||||
type: "remote"
|
||||
package: "npm"
|
||||
description: "npm registry — package metadata with tarball URL rewriting"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- \.tgz$
|
||||
mutable_patterns:
|
||||
- ^(?!.*\.tgz$).*
|
||||
cache:
|
||||
immutable_ttl: 0
|
||||
mutable_ttl: 600 # Package metadata refreshed after 10 minutes
|
||||
|
||||
hashicorp-helm:
|
||||
base_url: "https://helm.releases.hashicorp.com"
|
||||
type: "remote"
|
||||
package: "helm"
|
||||
description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)"
|
||||
check_mutable_updates: true
|
||||
immutable_patterns:
|
||||
- "\\.tgz$"
|
||||
cache:
|
||||
immutable_ttl: 0 # Chart tarballs are versioned — cache forever
|
||||
mutable_ttl: 3600 # index.yaml refreshed after 1 hour
|
||||
|
||||
local-generic:
|
||||
type: "local"
|
||||
package: "generic"
|
||||
@@ -0,0 +1,3 @@
|
||||
from . import discovery, docker, flush, local, proxy
|
||||
|
||||
__all__ = ["discovery", "docker", "flush", "local", "proxy"]
|
||||
@@ -0,0 +1,82 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException
|
||||
|
||||
from .proxy import cache_single_artifact
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
||||
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
||||
if not match:
|
||||
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
||||
|
||||
owner, repo = match.groups()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||
if response.status_code != 200:
|
||||
raise HTTPException(status_code=response.status_code, detail=f"Failed to fetch releases: {response.text}")
|
||||
|
||||
releases = response.json()
|
||||
regex = re.compile(include_pattern.replace("*", ".*"))
|
||||
return [
|
||||
asset["browser_download_url"]
|
||||
for release in releases
|
||||
for asset in release.get("assets", [])
|
||||
if regex.search(asset["browser_download_url"])
|
||||
]
|
||||
|
||||
|
||||
async def _discover(remote: str, include_pattern: str) -> list[str]:
|
||||
if "github.com" in remote:
|
||||
return await _discover_github_releases(remote, include_pattern)
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
||||
|
||||
|
||||
async def cache_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await _discover(remote, include_pattern)
|
||||
|
||||
if not matching_urls:
|
||||
return {"message": "No matching artifacts found", "cached_count": 0, "artifacts": []}
|
||||
|
||||
cached_artifacts = []
|
||||
for url in matching_urls:
|
||||
result = await cache_single_artifact(url, "", "", storage, {})
|
||||
cached_artifacts.append(result)
|
||||
|
||||
cached_count = sum(1 for a in cached_artifacts if a["status"] in ["cached", "already_cached"])
|
||||
return {
|
||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||
"cached_count": cached_count,
|
||||
"artifacts": cached_artifacts,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
async def list_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await _discover(remote, include_pattern)
|
||||
cached_artifacts = []
|
||||
for url in matching_urls:
|
||||
parsed = urlparse(url)
|
||||
key = storage.get_object_key(remote, parsed.path)
|
||||
if storage.exists(key):
|
||||
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||
|
||||
return {
|
||||
"remote": remote,
|
||||
"pattern": include_pattern,
|
||||
"total_found": len(matching_urls),
|
||||
"cached_count": len(cached_artifacts),
|
||||
"artifacts": cached_artifacts,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
@@ -0,0 +1,103 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
from fastapi import HTTPException, Request, Response
|
||||
|
||||
from . import proxy as _proxy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def ping() -> Response:
|
||||
return Response(
|
||||
content="{}",
|
||||
media_type="application/json",
|
||||
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
||||
)
|
||||
|
||||
|
||||
async def proxy(request: Request, remote_name: str, path: str, storage, cache, config, metrics) -> Response:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
if remote_config.get("package") != "docker":
|
||||
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
||||
|
||||
patterns = config.get_immutable_patterns(remote_name, "")
|
||||
if patterns:
|
||||
path_parts = path.split("/")
|
||||
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
||||
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
||||
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
||||
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
remote_url = f"{base_url}/v2/{path}"
|
||||
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await _proxy.handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||
cached_key = None
|
||||
|
||||
if not cached_key:
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await _proxy.cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||
if result["status"] == "error":
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
if not is_mutable:
|
||||
published = result.get("last_modified")
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_proxy._check_quarantine(remote_name, published, config)
|
||||
elif not is_mutable:
|
||||
published = cache.get_artifact_published(remote_name, path)
|
||||
if not published:
|
||||
published = await _proxy._fetch_last_modified(remote_url, remote_config)
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_proxy._check_quarantine(remote_name, published, config)
|
||||
|
||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||
|
||||
is_blob = "/blobs/" in path
|
||||
if is_blob:
|
||||
content_type = "application/octet-stream"
|
||||
else:
|
||||
try:
|
||||
manifest_json = json.loads(artifact_data)
|
||||
content_type = manifest_json.get("mediaType")
|
||||
if not content_type:
|
||||
if "manifests" in manifest_json:
|
||||
content_type = "application/vnd.oci.image.index.v1+json"
|
||||
else:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
except Exception:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
|
||||
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
||||
headers = {
|
||||
"Docker-Distribution-Api-Version": "registry/2.0",
|
||||
"Docker-Content-Digest": digest,
|
||||
"Content-Length": str(len(artifact_data)),
|
||||
}
|
||||
|
||||
if request.method == "HEAD":
|
||||
return Response(status_code=200, headers=headers, media_type=content_type)
|
||||
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
||||
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handle(remote: str | None, cache_type: str, cache, storage) -> dict:
|
||||
try:
|
||||
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||
|
||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||
patterns = []
|
||||
|
||||
if cache_type in ["all", "index"]:
|
||||
if remote:
|
||||
patterns += [f"index:{remote}:*", f"mutable:meta:{remote}:*"]
|
||||
else:
|
||||
patterns += ["index:*", "mutable:meta:*"]
|
||||
|
||||
if cache_type in ["all", "metrics"]:
|
||||
patterns.append(f"metrics:*:{remote}" if remote else "metrics:*")
|
||||
|
||||
for pattern in patterns:
|
||||
keys = cache.client.keys(pattern)
|
||||
if keys:
|
||||
cache.client.delete(*keys)
|
||||
result["flushed"]["redis_keys"] += len(keys)
|
||||
logger.info(f"Cache flush: deleted {len(keys)} Redis keys matching '{pattern}'")
|
||||
|
||||
if result["flushed"]["redis_keys"] > 0:
|
||||
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
||||
|
||||
if cache_type in ["all", "files"]:
|
||||
try:
|
||||
list_params = {"Bucket": storage.bucket}
|
||||
if remote:
|
||||
list_params["Prefix"] = f"{remote}/"
|
||||
|
||||
response = storage.client.list_objects_v2(**list_params)
|
||||
if "Contents" in response:
|
||||
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||
for key in objects_to_delete:
|
||||
try:
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
||||
result["flushed"]["s3_objects"] += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
||||
|
||||
if objects_to_delete:
|
||||
scope = f" for remote '{remote}'" if remote else ""
|
||||
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
logger.info(f"Cache flush: deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
|
||||
except Exception as e:
|
||||
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
||||
logger.error(f"Cache flush S3 error: {e}")
|
||||
|
||||
if not result["flushed"]["operations"]:
|
||||
result["flushed"]["operations"].append("No cache entries found to flush")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cache flush error: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
||||
@@ -0,0 +1,108 @@
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from fastapi import HTTPException, Response, UploadFile
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def upload(remote_name: str, path: str, file: UploadFile, storage, database, config) -> JSONResponse:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||
|
||||
try:
|
||||
content = await file.read()
|
||||
sha256_sum = hashlib.sha256(content).hexdigest()
|
||||
|
||||
if database.file_exists(remote_name, path):
|
||||
raise HTTPException(status_code=409, detail="File already exists")
|
||||
|
||||
s3_key = f"local/{remote_name}/{path}"
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
|
||||
try:
|
||||
storage.upload(s3_key, content)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
||||
|
||||
success = database.add_local_file(
|
||||
repository_name=remote_name,
|
||||
file_path=path,
|
||||
s3_key=s3_key,
|
||||
size_bytes=len(content),
|
||||
sha256_sum=sha256_sum,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
if not success:
|
||||
storage.delete_object(s3_key)
|
||||
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"message": "File uploaded successfully",
|
||||
"file_path": path,
|
||||
"size_bytes": len(content),
|
||||
"sha256_sum": sha256_sum,
|
||||
"content_type": content_type,
|
||||
}
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
||||
|
||||
|
||||
def check_exists(remote_name: str, path: str, database, config) -> Response:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||
|
||||
try:
|
||||
metadata = database.get_local_file_metadata(remote_name, path)
|
||||
if not metadata:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
return Response(
|
||||
headers={
|
||||
"Content-Length": str(metadata["size_bytes"]),
|
||||
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||
"X-SHA256": metadata["sha256_sum"],
|
||||
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||
|
||||
|
||||
def delete(remote_name: str, path: str, storage, database, config) -> JSONResponse:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||
|
||||
try:
|
||||
s3_key = database.delete_local_file(remote_name, path)
|
||||
if not s3_key:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
if not storage.delete_object(s3_key):
|
||||
logger.warning(f"Failed to delete S3 object {s3_key} after database removal")
|
||||
|
||||
return JSONResponse({"message": "File deleted successfully"})
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
||||
@@ -0,0 +1,331 @@
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException, Request, Response
|
||||
|
||||
from ..auth import get_docker_token_for_response
|
||||
from ..remote import helm as _helm
|
||||
from ..remote import npm as _npm
|
||||
from ..remote import python as _pypi
|
||||
from ..remote.base import get_content_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UpstreamUnreachable(Exception):
|
||||
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||
|
||||
|
||||
def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None:
|
||||
"""Raise HTTP 404 if the artifact is within the per-remote quarantine window.
|
||||
|
||||
Fails open (allows the request) when the publish date cannot be determined.
|
||||
"""
|
||||
enabled, days = config.get_quarantine_config(remote_name)
|
||||
if not enabled or not days:
|
||||
return
|
||||
if not last_modified_str:
|
||||
return # cannot determine age → allow
|
||||
try:
|
||||
publish_date = parsedate_to_datetime(last_modified_str)
|
||||
except Exception:
|
||||
return # unparseable → allow
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
if publish_date > cutoff:
|
||||
available_on = (publish_date + timedelta(days=days)).date()
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=(
|
||||
f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None:
|
||||
"""HEAD the upstream URL and return the Last-Modified header, or None on any failure."""
|
||||
auth = _basic_auth_header(remote_cfg)
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.head(remote_url, headers=auth, timeout=10.0)
|
||||
return response.headers.get("Last-Modified")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
||||
username = remote_cfg.get("username")
|
||||
password = remote_cfg.get("password")
|
||||
if username and password:
|
||||
token = base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||
return {"Authorization": f"Basic {token}"}
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
remote_config: dict,
|
||||
request: Request,
|
||||
remote_name: str = "",
|
||||
) -> tuple[bytes, str]:
|
||||
package = remote_config.get("package")
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
|
||||
if package == "pypi":
|
||||
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||
if package == "npm":
|
||||
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||
if package == "helm":
|
||||
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
|
||||
return data, get_content_type(filename)
|
||||
|
||||
|
||||
def construct_url(remote_config: dict, path: str) -> str:
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
if remote_config.get("package") == "docker":
|
||||
return f"{base_url}/v2/{path}"
|
||||
if remote_config.get("package") == "pypi":
|
||||
return _pypi.construct_url(base_url, path)
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
async def cache_single_artifact(url: str, remote_name: str, path: str, storage, remote_config: dict) -> dict:
|
||||
key = storage.get_object_key(remote_name, path)
|
||||
|
||||
if storage.exists(key):
|
||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||
return {"url": url, "cached_url": storage.get_url(key), "status": "already_cached"}
|
||||
|
||||
try:
|
||||
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
||||
headers = {}
|
||||
username = remote_config.get("username")
|
||||
password = remote_config.get("password")
|
||||
|
||||
if is_docker:
|
||||
if "/manifests/" in url:
|
||||
headers["Accept"] = (
|
||||
"application/vnd.docker.distribution.manifest.v2+json,"
|
||||
"application/vnd.oci.image.manifest.v1+json,"
|
||||
"application/vnd.oci.image.index.v1+json,"
|
||||
"application/vnd.docker.distribution.manifest.list.v2+json"
|
||||
)
|
||||
elif "/blobs/" in url:
|
||||
headers["Accept"] = "application/octet-stream"
|
||||
elif username and password:
|
||||
headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
if response.status_code == 401 and is_docker:
|
||||
www_auth = response.headers.get("WWW-Authenticate", "")
|
||||
token = await get_docker_token_for_response(www_auth, username, password)
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
response.raise_for_status()
|
||||
storage.upload(key, response.content)
|
||||
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"cached_url": storage.get_url(key),
|
||||
"storage_path": f"s3://{storage.bucket}/{key}",
|
||||
"size": len(response.content),
|
||||
"status": "cached",
|
||||
"etag": response.headers.get("ETag"),
|
||||
"last_modified": response.headers.get("Last-Modified"),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"url": url, "status": "error", "error": str(e)}
|
||||
|
||||
|
||||
async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool:
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
await client.head(url, headers=auth_headers or {}, timeout=10.0)
|
||||
return True
|
||||
except (httpx.NetworkError, httpx.TimeoutException):
|
||||
return False
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str, cache, auth_headers: dict | None = None) -> bool:
|
||||
meta = cache.get_mutable_meta(remote_name, path)
|
||||
if not meta:
|
||||
return True
|
||||
|
||||
headers = dict(auth_headers or {})
|
||||
if meta.get("etag"):
|
||||
headers["If-None-Match"] = meta["etag"]
|
||||
if meta.get("last_modified"):
|
||||
headers["If-Modified-Since"] = meta["last_modified"]
|
||||
if not (meta.get("etag") or meta.get("last_modified")):
|
||||
return True
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.head(remote_url, headers=headers)
|
||||
return response.status_code != 304
|
||||
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||
raise UpstreamUnreachable(str(exc)) from exc
|
||||
|
||||
|
||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str, config, cache, storage) -> bool:
|
||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||
auth = _basic_auth_header(remote_cfg)
|
||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||
|
||||
if user_mutable:
|
||||
try:
|
||||
changed = await check_upstream_changed(remote_url, remote_name, path, cache, auth)
|
||||
except UpstreamUnreachable:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
if not changed:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||
else:
|
||||
if not await _upstream_reachable(remote_url, auth):
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||
|
||||
cache.cleanup_expired_index(storage, remote_name, path)
|
||||
return False
|
||||
|
||||
|
||||
async def handle(request: Request, remote_name: str, path: str, storage, cache, config, database, metrics) -> Response:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") == "local":
|
||||
metadata = database.get_local_file_metadata(remote_name, path)
|
||||
if not metadata:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
content = storage.download_object(metadata["s3_key"])
|
||||
if content is None:
|
||||
raise HTTPException(status_code=500, detail="File not accessible")
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=metadata.get("content_type", "application/octet-stream"),
|
||||
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||
)
|
||||
|
||||
path_parts = path.split("/")
|
||||
if len(path_parts) >= 2:
|
||||
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
||||
file_path = "/".join(path_parts[2:])
|
||||
else:
|
||||
repo_path = path
|
||||
file_path = path
|
||||
|
||||
mutable_patterns = config.get_mutable_patterns(remote_name)
|
||||
if not cache.is_mutable_file(file_path, mutable_patterns) and not cache.is_mutable_file(path, mutable_patterns):
|
||||
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
||||
if patterns and not any(re.search(p, file_path) or re.search(p, path) for p in patterns):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||
|
||||
remote_url = construct_url(remote_config, path)
|
||||
if not remote_config.get("base_url"):
|
||||
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
filename = os.path.basename(path)
|
||||
is_mutable = cache.is_mutable_file(path, mutable_patterns)
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await handle_expired_mutable(remote_name, path, remote_url, config, cache, storage):
|
||||
cached_key = None
|
||||
|
||||
if cached_key:
|
||||
if not is_mutable:
|
||||
published = cache.get_artifact_published(remote_name, path)
|
||||
if not published:
|
||||
published = await _fetch_last_modified(remote_url, remote_config)
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_check_quarantine(remote_name, published, config)
|
||||
|
||||
try:
|
||||
artifact_data = storage.download_object(cached_key)
|
||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"X-Artifact-Source": "cache",
|
||||
"X-Artifact-Size": str(len(artifact_data)),
|
||||
},
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await cache_single_artifact(remote_url, remote_name, path, storage, remote_config)
|
||||
|
||||
if result["status"] == "error":
|
||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
|
||||
if not is_mutable:
|
||||
published = result.get("last_modified")
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_check_quarantine(remote_name, published, config)
|
||||
|
||||
try:
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
artifact_data = storage.download_object(cache_key)
|
||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"X-Artifact-Source": "remote",
|
||||
"X-Artifact-Size": str(len(artifact_data)),
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||
@@ -0,0 +1,3 @@
|
||||
from .docker import fetch_token, get_docker_token_for_response, parse_www_authenticate
|
||||
|
||||
__all__ = ["fetch_token", "get_docker_token_for_response", "parse_www_authenticate"]
|
||||
@@ -0,0 +1,96 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# In-memory token cache: key -> (token, expires_at)
|
||||
_token_cache: dict[str, tuple[str, float]] = {}
|
||||
|
||||
_WWW_AUTH_RE = re.compile(
|
||||
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
||||
r'(?:,service="(?P<service>[^"]*)")?'
|
||||
r'(?:,scope="(?P<scope>[^"]*)")?',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||
|
||||
|
||||
def _get_cached_token(key: str) -> str | None:
|
||||
entry = _token_cache.get(key)
|
||||
if entry and entry[1] > time.time():
|
||||
return entry[0]
|
||||
_token_cache.pop(key, None)
|
||||
return None
|
||||
|
||||
|
||||
def _store_token(key: str, token: str, expires_in: int) -> None:
|
||||
# Expire 30s early to avoid using a token right as it expires
|
||||
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
||||
|
||||
|
||||
async def fetch_token(
|
||||
realm: str,
|
||||
service: str,
|
||||
scope: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||
key = _cache_key(realm, service, scope, username)
|
||||
cached = _get_cached_token(key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
params: dict[str, str] = {}
|
||||
if service:
|
||||
params["service"] = service
|
||||
if scope:
|
||||
params["scope"] = scope
|
||||
|
||||
auth = (username, password) if username and password else None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(realm, params=params, auth=auth)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
||||
return None
|
||||
|
||||
token = data.get("token") or data.get("access_token")
|
||||
if not token:
|
||||
logger.warning(f"Docker token response missing token field: {data}")
|
||||
return None
|
||||
|
||||
expires_in = int(data.get("expires_in", 300))
|
||||
_store_token(key, token, expires_in)
|
||||
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
||||
return token
|
||||
|
||||
|
||||
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||
m = _WWW_AUTH_RE.search(header)
|
||||
if not m:
|
||||
return None
|
||||
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
||||
|
||||
|
||||
async def get_docker_token_for_response(
|
||||
www_authenticate: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||
parsed = parse_www_authenticate(www_authenticate)
|
||||
if not parsed:
|
||||
return None
|
||||
realm, service, scope = parsed
|
||||
return await fetch_token(realm, service, scope, username, password)
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
from .redis import RedisCache
|
||||
|
||||
__all__ = ["RedisCache"]
|
||||
+22
-10
@@ -11,7 +11,6 @@ class RedisCache:
|
||||
|
||||
try:
|
||||
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||
# Test connection
|
||||
self.client.ping()
|
||||
self.available = True
|
||||
except Exception as e:
|
||||
@@ -20,7 +19,6 @@ class RedisCache:
|
||||
self.available = False
|
||||
|
||||
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||
"""Return True if file_path matches any of the mutable patterns."""
|
||||
if patterns is None:
|
||||
patterns = []
|
||||
return any(re.search(p, file_path) for p in patterns)
|
||||
@@ -32,10 +30,8 @@ class RedisCache:
|
||||
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||
"""Check if mutable file is still within its TTL window."""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
try:
|
||||
key = self.get_index_cache_key(remote_name, path)
|
||||
return self.client.exists(key) > 0
|
||||
@@ -43,10 +39,8 @@ class RedisCache:
|
||||
return False
|
||||
|
||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||
"""Set or refresh the TTL key for a mutable file."""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
try:
|
||||
key = self.get_index_cache_key(remote_name, path)
|
||||
self.client.setex(key, ttl, str(int(time.time())))
|
||||
@@ -54,7 +48,6 @@ class RedisCache:
|
||||
pass
|
||||
|
||||
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||
"""Persist ETag and Last-Modified for future conditional requests."""
|
||||
if not self.available:
|
||||
return
|
||||
data = {}
|
||||
@@ -70,7 +63,6 @@ class RedisCache:
|
||||
pass
|
||||
|
||||
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
|
||||
if not self.available:
|
||||
return {}
|
||||
try:
|
||||
@@ -86,15 +78,35 @@ class RedisCache:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_artifact_published_key(self, remote_name: str, path: str) -> str:
|
||||
return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None:
|
||||
"""Persist the upstream Last-Modified header for a (typically immutable) artifact."""
|
||||
if not self.available:
|
||||
return
|
||||
try:
|
||||
self.client.set(self.get_artifact_published_key(remote_name, path), last_modified)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_artifact_published(self, remote_name: str, path: str) -> str | None:
|
||||
"""Return the stored Last-Modified string for an artifact, or None."""
|
||||
if not self.available:
|
||||
return None
|
||||
try:
|
||||
return self.client.get(self.get_artifact_published_key(remote_name, path))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||
"""Remove an expired mutable file from S3 and clear its Redis meta."""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
try:
|
||||
import os
|
||||
|
||||
from .config import ConfigManager
|
||||
from ..config import ConfigManager
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if config_path:
|
||||
+91
-14
@@ -1,3 +1,4 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
|
||||
@@ -18,36 +19,99 @@ _PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = {
|
||||
r"/manifests/(?!sha256:)[^/]+$",
|
||||
r"/tags/list$",
|
||||
],
|
||||
"pypi": [
|
||||
r"simple/", # Per-package and top-level simple index pages
|
||||
],
|
||||
"npm": [],
|
||||
"helm": [
|
||||
r"index\.yaml$",
|
||||
],
|
||||
"generic": [],
|
||||
}
|
||||
|
||||
|
||||
class ConfigManager:
|
||||
def __init__(self, config_file: str = "remotes.yaml"):
|
||||
self.config_file = config_file
|
||||
self._last_modified = 0
|
||||
def __init__(self, config_path: str = "remotes.yaml"):
|
||||
self.config_path = config_path
|
||||
self._config_dir: str | None = None
|
||||
self._last_modified: float = 0.0
|
||||
self.config = self._load_config()
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
def _load_single_file(self, path: str) -> dict:
|
||||
try:
|
||||
with open(self.config_file) as f:
|
||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(".yml"):
|
||||
return yaml.safe_load(f)
|
||||
else:
|
||||
with open(path) as f:
|
||||
if path.endswith((".yaml", ".yml")):
|
||||
return yaml.safe_load(f) or {}
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _merge(base: dict, overlay: dict) -> dict:
|
||||
result = {**base}
|
||||
for key, value in overlay.items():
|
||||
if key == "remotes" and isinstance(base.get("remotes"), dict) and isinstance(value, dict):
|
||||
result["remotes"] = {**base.get("remotes", {}), **value}
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
def _load_from_dir(self, dir_path: str) -> dict:
|
||||
merged: dict = {}
|
||||
files = sorted(glob.glob(os.path.join(dir_path, "*.yaml")) + glob.glob(os.path.join(dir_path, "*.yml")))
|
||||
for path in files:
|
||||
merged = self._merge(merged, self._load_single_file(path))
|
||||
return merged
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
self._config_dir = None
|
||||
|
||||
if os.path.isdir(self.config_path):
|
||||
return self._load_from_dir(self.config_path) or {"remotes": {}}
|
||||
|
||||
config = self._load_single_file(self.config_path)
|
||||
if not config:
|
||||
return {"remotes": {}}
|
||||
|
||||
def _check_reload(self) -> None:
|
||||
"""Check if config file has been modified and reload if needed"""
|
||||
try:
|
||||
import os
|
||||
config_dir = config.pop("config_dir", None)
|
||||
if config_dir:
|
||||
if not os.path.isabs(config_dir):
|
||||
config_dir = os.path.join(os.path.dirname(os.path.abspath(self.config_path)), config_dir)
|
||||
self._config_dir = config_dir
|
||||
config = self._merge(config, self._load_from_dir(config_dir))
|
||||
|
||||
current_modified = os.path.getmtime(self.config_file)
|
||||
return config
|
||||
|
||||
def _file_mtimes(self) -> list[float]:
|
||||
mtimes: list[float] = []
|
||||
if os.path.isdir(self.config_path):
|
||||
for f in glob.glob(os.path.join(self.config_path, "*.yaml")) + glob.glob(os.path.join(self.config_path, "*.yml")):
|
||||
try:
|
||||
mtimes.append(os.path.getmtime(f))
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
mtimes.append(os.path.getmtime(self.config_path))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
if self._config_dir and os.path.isdir(self._config_dir):
|
||||
for f in glob.glob(os.path.join(self._config_dir, "*.yaml")) + glob.glob(os.path.join(self._config_dir, "*.yml")):
|
||||
try:
|
||||
mtimes.append(os.path.getmtime(f))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return mtimes
|
||||
|
||||
def _check_reload(self) -> None:
|
||||
try:
|
||||
current_modified = max(self._file_mtimes(), default=0.0)
|
||||
if current_modified > self._last_modified:
|
||||
self._last_modified = current_modified
|
||||
self.config = self._load_config()
|
||||
print(f"Config reloaded from {self.config_file}")
|
||||
print(f"Config reloaded from {self.config_path}")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
@@ -152,3 +216,16 @@ class ConfigManager:
|
||||
return {}
|
||||
|
||||
return remote_config.get("cache", {})
|
||||
|
||||
def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]:
|
||||
"""Return (enabled, quarantine_days) for a remote.
|
||||
|
||||
When enabled=True and quarantine_days>0, immutable artifacts published
|
||||
within the last quarantine_days days are blocked with a 404.
|
||||
"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return False, 0
|
||||
enabled = bool(remote_config.get("quarantine_new", False))
|
||||
days = int(remote_config.get("quarantine_days", 0))
|
||||
return enabled, days
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from .postgres import DatabaseManager
|
||||
|
||||
__all__ = ["DatabaseManager"]
|
||||
@@ -9,7 +9,6 @@ class DatabaseManager:
|
||||
self._init_database()
|
||||
|
||||
def _init_database(self):
|
||||
"""Initialize database connection and create schema if needed"""
|
||||
try:
|
||||
self.connection = psycopg2.connect(self.db_url)
|
||||
self.connection.autocommit = True
|
||||
@@ -21,10 +20,8 @@ class DatabaseManager:
|
||||
self.available = False
|
||||
|
||||
def _create_schema(self):
|
||||
"""Create tables if they don't exist"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
# Create table to map S3 keys to remote names
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||
id SERIAL PRIMARY KEY,
|
||||
@@ -51,7 +48,6 @@ class DatabaseManager:
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes separately
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||
@@ -61,7 +57,6 @@ class DatabaseManager:
|
||||
print(f"Error creating schema: {e}")
|
||||
|
||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||
"""Record mapping between S3 key and remote"""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
@@ -83,7 +78,6 @@ class DatabaseManager:
|
||||
print(f"Error recording artifact mapping: {e}")
|
||||
|
||||
def get_storage_by_remote(self) -> dict[str, int]:
|
||||
"""Get storage size breakdown by remote from database"""
|
||||
if not self.available:
|
||||
return {}
|
||||
|
||||
@@ -101,7 +95,6 @@ class DatabaseManager:
|
||||
return {}
|
||||
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||
"""Get remote name for given S3 key"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -126,7 +119,6 @@ class DatabaseManager:
|
||||
sha256_sum: str,
|
||||
content_type: str = None,
|
||||
):
|
||||
"""Add a file to local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -153,7 +145,6 @@ class DatabaseManager:
|
||||
return False
|
||||
|
||||
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||
"""Get metadata for a local file"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -185,7 +176,6 @@ class DatabaseManager:
|
||||
return None
|
||||
|
||||
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||
"""List files in local repository with optional path prefix"""
|
||||
if not self.available:
|
||||
return []
|
||||
|
||||
@@ -229,7 +219,6 @@ class DatabaseManager:
|
||||
return []
|
||||
|
||||
def delete_local_file(self, repository_name: str, file_path: str):
|
||||
"""Delete a file from local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -251,7 +240,6 @@ class DatabaseManager:
|
||||
return None
|
||||
|
||||
def file_exists(self, repository_name: str, file_path: str):
|
||||
"""Check if file exists in local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -1,96 +1,19 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# In-memory token cache: key -> (token, expires_at)
|
||||
_token_cache: dict[str, tuple[str, float]] = {}
|
||||
|
||||
_WWW_AUTH_RE = re.compile(
|
||||
r'Bearer\s+realm="(?P<realm>[^"]+)"'
|
||||
r'(?:,service="(?P<service>[^"]*)")?'
|
||||
r'(?:,scope="(?P<scope>[^"]*)")?',
|
||||
re.IGNORECASE,
|
||||
from .auth.docker import (
|
||||
_cache_key,
|
||||
_get_cached_token,
|
||||
_store_token,
|
||||
_token_cache,
|
||||
fetch_token,
|
||||
get_docker_token_for_response,
|
||||
parse_www_authenticate,
|
||||
)
|
||||
|
||||
|
||||
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||
|
||||
|
||||
def _get_cached_token(key: str) -> str | None:
|
||||
entry = _token_cache.get(key)
|
||||
if entry and entry[1] > time.time():
|
||||
return entry[0]
|
||||
_token_cache.pop(key, None)
|
||||
return None
|
||||
|
||||
|
||||
def _store_token(key: str, token: str, expires_in: int) -> None:
|
||||
# Expire 30s early to avoid using a token right as it expires
|
||||
_token_cache[key] = (token, time.time() + max(expires_in - 30, 10))
|
||||
|
||||
|
||||
async def fetch_token(
|
||||
realm: str,
|
||||
service: str,
|
||||
scope: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||
key = _cache_key(realm, service, scope, username)
|
||||
cached = _get_cached_token(key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
params: dict[str, str] = {}
|
||||
if service:
|
||||
params["service"] = service
|
||||
if scope:
|
||||
params["scope"] = scope
|
||||
|
||||
auth = (username, password) if username and password else None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(realm, params=params, auth=auth)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"Docker token fetch failed ({realm}): {e}")
|
||||
return None
|
||||
|
||||
token = data.get("token") or data.get("access_token")
|
||||
if not token:
|
||||
logger.warning(f"Docker token response missing token field: {data}")
|
||||
return None
|
||||
|
||||
expires_in = int(data.get("expires_in", 300))
|
||||
_store_token(key, token, expires_in)
|
||||
logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)")
|
||||
return token
|
||||
|
||||
|
||||
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||
m = _WWW_AUTH_RE.search(header)
|
||||
if not m:
|
||||
return None
|
||||
return m.group("realm"), m.group("service") or "", m.group("scope") or ""
|
||||
|
||||
|
||||
async def get_docker_token_for_response(
|
||||
www_authenticate: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||
parsed = parse_www_authenticate(www_authenticate)
|
||||
if not parsed:
|
||||
return None
|
||||
realm, service, scope = parsed
|
||||
return await fetch_token(realm, service, scope, username, password)
|
||||
__all__ = [
|
||||
"_cache_key",
|
||||
"_get_cached_token",
|
||||
"_store_token",
|
||||
"_token_cache",
|
||||
"fetch_token",
|
||||
"get_docker_token_for_response",
|
||||
"parse_www_authenticate",
|
||||
]
|
||||
|
||||
+51
-738
@@ -1,13 +1,8 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from fastapi import FastAPI, File, HTTPException, Query, Request, Response, UploadFile
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from fastapi import FastAPI, File, Query, Request, UploadFile
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -16,58 +11,45 @@ try:
|
||||
|
||||
__version__ = version("artifactapi")
|
||||
except ImportError:
|
||||
# Fallback for development when package isn't installed
|
||||
__version__ = "dev"
|
||||
|
||||
from .artifact import discovery, flush, local, proxy
|
||||
from .artifact import docker as docker_handler
|
||||
from .cache import RedisCache
|
||||
from .config import ConfigManager
|
||||
from .database import DatabaseManager
|
||||
from .docker_auth import get_docker_token_for_response
|
||||
from .metrics import MetricsManager
|
||||
from .storage import S3Storage
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if not config_path:
|
||||
raise ValueError("CONFIG_PATH environment variable is required")
|
||||
config = ConfigManager(config_path)
|
||||
|
||||
s3_config = config.get_s3_config()
|
||||
redis_config = config.get_redis_config()
|
||||
db_config = config.get_database_config()
|
||||
|
||||
storage = S3Storage(**s3_config)
|
||||
cache = RedisCache(redis_config["url"])
|
||||
database = DatabaseManager(db_config["url"])
|
||||
metrics = MetricsManager(cache, database)
|
||||
|
||||
|
||||
class ArtifactRequest(BaseModel):
|
||||
remote: str
|
||||
include_pattern: str
|
||||
|
||||
|
||||
class UpstreamUnreachable(Exception):
|
||||
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||
|
||||
# Initialize components using config
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if not config_path:
|
||||
raise ValueError("CONFIG_PATH environment variable is required")
|
||||
config = ConfigManager(config_path)
|
||||
|
||||
# Get configurations
|
||||
s3_config = config.get_s3_config()
|
||||
redis_config = config.get_redis_config()
|
||||
db_config = config.get_database_config()
|
||||
|
||||
# Initialize services
|
||||
storage = S3Storage(**s3_config)
|
||||
cache = RedisCache(redis_config["url"])
|
||||
database = DatabaseManager(db_config["url"])
|
||||
metrics = MetricsManager(cache, database)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def read_root():
|
||||
config._check_reload()
|
||||
return {
|
||||
"message": "Artifact Storage API",
|
||||
"version": app.version,
|
||||
"remotes": list(config.config.get("remotes", {}).keys()),
|
||||
}
|
||||
return {"message": "Artifact Storage API", "version": app.version, "remotes": list(config.config.get("remotes", {}).keys())}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
@@ -75,735 +57,66 @@ def health_check():
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
@app.get("/config")
|
||||
def get_config():
|
||||
return config.config
|
||||
|
||||
|
||||
@app.get("/metrics")
|
||||
def get_metrics(json: bool | None = Query(False, description="Return JSON format instead of Prometheus")):
|
||||
config._check_reload()
|
||||
if json:
|
||||
return metrics.get_metrics(storage, config)
|
||||
metrics.get_metrics(storage, config)
|
||||
return PlainTextResponse(generate_latest().decode("utf-8"), media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
@app.put("/cache/flush")
|
||||
def flush_cache(
|
||||
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
||||
):
|
||||
"""Flush cache entries for specified remote or all remotes"""
|
||||
try:
|
||||
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||
|
||||
# Flush Redis entries based on cache_type
|
||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||
patterns = []
|
||||
|
||||
if cache_type in ["all", "index"]:
|
||||
if remote:
|
||||
patterns.append(f"index:{remote}:*")
|
||||
patterns.append(f"mutable:meta:{remote}:*")
|
||||
else:
|
||||
patterns.append("index:*")
|
||||
patterns.append("mutable:meta:*")
|
||||
|
||||
if cache_type in ["all", "metrics"]:
|
||||
if remote:
|
||||
patterns.append(f"metrics:*:{remote}")
|
||||
else:
|
||||
patterns.append("metrics:*")
|
||||
|
||||
for pattern in patterns:
|
||||
keys = cache.client.keys(pattern)
|
||||
if keys:
|
||||
cache.client.delete(*keys)
|
||||
result["flushed"]["redis_keys"] += len(keys)
|
||||
logger.info(f"Cache flush: Deleted {len(keys)} Redis keys matching '{pattern}'")
|
||||
|
||||
if result["flushed"]["redis_keys"] > 0:
|
||||
result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys")
|
||||
|
||||
# Flush S3 objects if requested
|
||||
if cache_type in ["all", "files"]:
|
||||
try:
|
||||
# Use prefix filtering for remote-specific deletion
|
||||
list_params = {"Bucket": storage.bucket}
|
||||
if remote:
|
||||
list_params["Prefix"] = f"{remote}/"
|
||||
|
||||
response = storage.client.list_objects_v2(**list_params)
|
||||
if "Contents" in response:
|
||||
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||
|
||||
for key in objects_to_delete:
|
||||
try:
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=key)
|
||||
result["flushed"]["s3_objects"] += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete S3 object {key}: {e}")
|
||||
|
||||
if objects_to_delete:
|
||||
scope = f" for remote '{remote}'" if remote else ""
|
||||
result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
logger.info(f"Cache flush: Deleted {len(objects_to_delete)} S3 objects{scope}")
|
||||
|
||||
except Exception as e:
|
||||
result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}")
|
||||
logger.error(f"Cache flush S3 error: {e}")
|
||||
|
||||
if not result["flushed"]["operations"]:
|
||||
result["flushed"]["operations"].append("No cache entries found to flush")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cache flush error: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}")
|
||||
|
||||
|
||||
async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
base_url = remote_config.get("base_url")
|
||||
if not base_url:
|
||||
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||
|
||||
# Handle Docker registry URLs
|
||||
if remote_config.get("package") == "docker":
|
||||
# Convert Docker paths to v2 API format
|
||||
# e.g., library/nginx/manifests/latest -> v2/library/nginx/manifests/latest
|
||||
return f"{base_url}/v2/{path}"
|
||||
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: str, full_path: str) -> bool:
|
||||
# Mutable files (index files) are always allowed through
|
||||
mutable_patterns = config.get_mutable_patterns(remote_name)
|
||||
if cache.is_mutable_file(file_path, mutable_patterns) or cache.is_mutable_file(full_path, mutable_patterns):
|
||||
return True
|
||||
|
||||
# Check immutable include patterns
|
||||
patterns = config.get_immutable_patterns(remote_name, repo_path)
|
||||
if not patterns:
|
||||
return True # Allow all if no patterns configured
|
||||
|
||||
pattern_matched = False
|
||||
for pattern in patterns:
|
||||
# Check both file_path and full_path to handle different pattern types
|
||||
if re.search(pattern, file_path) or re.search(pattern, full_path):
|
||||
pattern_matched = True
|
||||
break
|
||||
|
||||
if not pattern_matched:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
||||
# Use hierarchical path-based key
|
||||
key = storage.get_object_key(remote_name, path)
|
||||
|
||||
if storage.exists(key):
|
||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||
return {
|
||||
"url": url,
|
||||
"cached_url": storage.get_url(key),
|
||||
"status": "already_cached",
|
||||
}
|
||||
|
||||
try:
|
||||
remote_config = config.get_remote_config(remote_name) or {}
|
||||
is_docker = remote_config.get("package") == "docker" or "/v2/" in url
|
||||
|
||||
# Prepare headers for Docker registry requests
|
||||
headers = {}
|
||||
if is_docker:
|
||||
if "/manifests/" in url:
|
||||
headers["Accept"] = (
|
||||
"application/vnd.docker.distribution.manifest.v2+json,"
|
||||
"application/vnd.oci.image.manifest.v1+json,"
|
||||
"application/vnd.oci.image.index.v1+json,"
|
||||
"application/vnd.docker.distribution.manifest.list.v2+json"
|
||||
)
|
||||
elif "/blobs/" in url:
|
||||
headers["Accept"] = "application/octet-stream"
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
# Handle Docker Bearer token challenge
|
||||
if response.status_code == 401 and is_docker:
|
||||
www_auth = response.headers.get("WWW-Authenticate", "")
|
||||
username = remote_config.get("username")
|
||||
password = remote_config.get("password")
|
||||
token = await get_docker_token_for_response(www_auth, username, password)
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
storage_path = storage.upload(key, response.content)
|
||||
|
||||
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"cached_url": storage.get_url(key),
|
||||
"storage_path": storage_path,
|
||||
"size": len(response.content),
|
||||
"status": "cached",
|
||||
"etag": response.headers.get("ETag"),
|
||||
"last_modified": response.headers.get("Last-Modified"),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"url": url, "status": "error", "error": str(e)}
|
||||
|
||||
|
||||
async def _upstream_reachable(url: str) -> bool:
|
||||
"""HEAD with a short timeout. Returns False only on network/timeout errors."""
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
await client.head(url, timeout=10.0)
|
||||
return True
|
||||
except (httpx.NetworkError, httpx.TimeoutException):
|
||||
return False
|
||||
except Exception:
|
||||
return True # 4xx/5xx means backend is up
|
||||
|
||||
|
||||
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
|
||||
"""Conditional HEAD against upstream. Returns False only on a definitive 304.
|
||||
Raises UpstreamUnreachable if the backend cannot be contacted."""
|
||||
meta = cache.get_mutable_meta(remote_name, path)
|
||||
if not meta:
|
||||
return True
|
||||
|
||||
headers = {}
|
||||
if meta.get("etag"):
|
||||
headers["If-None-Match"] = meta["etag"]
|
||||
if meta.get("last_modified"):
|
||||
headers["If-Modified-Since"] = meta["last_modified"]
|
||||
if not headers:
|
||||
return True
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.head(remote_url, headers=headers)
|
||||
return response.status_code != 304
|
||||
except (httpx.NetworkError, httpx.TimeoutException) as exc:
|
||||
raise UpstreamUnreachable(str(exc)) from exc
|
||||
|
||||
|
||||
async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -> bool:
|
||||
"""Handle an expired mutable file. Returns True if the cached copy is still valid."""
|
||||
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
|
||||
|
||||
remote_cfg = config.get_remote_config(remote_name) or {}
|
||||
check_updates = remote_cfg.get("check_mutable_updates", False)
|
||||
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
|
||||
|
||||
if user_mutable:
|
||||
try:
|
||||
changed = await check_upstream_changed(remote_url, remote_name, path)
|
||||
except UpstreamUnreachable:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
if not changed:
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
|
||||
else:
|
||||
if not await _upstream_reachable(remote_url):
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)")
|
||||
return True
|
||||
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
|
||||
|
||||
cache.cleanup_expired_index(storage, remote_name, path)
|
||||
return False
|
||||
|
||||
|
||||
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||
async def get_artifact(remote_name: str, path: str):
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
# Check if this is a local repository
|
||||
if remote_config.get("type") == "local":
|
||||
# Handle local repository download
|
||||
metadata = database.get_local_file_metadata(remote_name, path)
|
||||
if not metadata:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Get file from S3
|
||||
content = storage.download_object(metadata["s3_key"])
|
||||
if content is None:
|
||||
raise HTTPException(status_code=500, detail="File not accessible")
|
||||
|
||||
# Determine content type
|
||||
content_type = metadata.get("content_type", "application/octet-stream")
|
||||
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=content_type,
|
||||
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||
)
|
||||
|
||||
# Extract repository path for pattern checking
|
||||
path_parts = path.split("/")
|
||||
if len(path_parts) >= 2:
|
||||
repo_path = f"{path_parts[0]}/{path_parts[1]}"
|
||||
file_path = "/".join(path_parts[2:])
|
||||
else:
|
||||
repo_path = path
|
||||
file_path = path
|
||||
|
||||
# Check if artifact matches configured patterns
|
||||
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||
|
||||
# Construct the remote URL
|
||||
remote_url = await construct_remote_url(remote_name, path)
|
||||
|
||||
# Check if artifact is already cached
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
# For mutable files, check Redis TTL validity
|
||||
filename = os.path.basename(path)
|
||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
||||
cached_key = None
|
||||
|
||||
if cached_key:
|
||||
# Return cached artifact
|
||||
try:
|
||||
artifact_data = storage.download_object(cached_key)
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# Log cache hit
|
||||
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
|
||||
|
||||
# Determine content type based on file extension
|
||||
content_type = "application/octet-stream"
|
||||
if filename.endswith(".tar.gz"):
|
||||
content_type = "application/gzip"
|
||||
elif filename.endswith(".zip"):
|
||||
content_type = "application/zip"
|
||||
elif filename.endswith(".exe"):
|
||||
content_type = "application/x-msdownload"
|
||||
elif filename.endswith(".rpm"):
|
||||
content_type = "application/x-rpm"
|
||||
elif filename.endswith(".xml"):
|
||||
content_type = "application/xml"
|
||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
content_type = "application/gzip"
|
||||
|
||||
# Record cache hit metrics
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
|
||||
# Record artifact mapping in database if not already recorded
|
||||
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"X-Artifact-Source": "cache",
|
||||
"X-Artifact-Size": str(len(artifact_data)),
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||
|
||||
# Artifact not cached, cache it first
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
||||
|
||||
if result["status"] == "error":
|
||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||
|
||||
# Mark mutable files as cached in Redis with TTL
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
|
||||
# Now return the cached artifact
|
||||
try:
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
artifact_data = storage.download_object(cache_key)
|
||||
filename = os.path.basename(path)
|
||||
|
||||
content_type = "application/octet-stream"
|
||||
if filename.endswith(".tar.gz"):
|
||||
content_type = "application/gzip"
|
||||
elif filename.endswith(".zip"):
|
||||
content_type = "application/zip"
|
||||
elif filename.endswith(".exe"):
|
||||
content_type = "application/x-msdownload"
|
||||
elif filename.endswith(".rpm"):
|
||||
content_type = "application/x-rpm"
|
||||
elif filename.endswith(".xml"):
|
||||
content_type = "application/xml"
|
||||
elif filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
content_type = "application/gzip"
|
||||
|
||||
# Record cache miss metrics
|
||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||
|
||||
# Record artifact mapping in database
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"X-Artifact-Source": "remote",
|
||||
"X-Artifact-Size": str(len(artifact_data)),
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}")
|
||||
return flush.handle(remote, cache_type, cache, storage)
|
||||
|
||||
|
||||
@app.get("/v2/")
|
||||
async def docker_v2_ping():
|
||||
return Response(
|
||||
content="{}",
|
||||
media_type="application/json",
|
||||
headers={"Docker-Distribution-Api-Version": "registry/2.0"},
|
||||
)
|
||||
return docker_handler.ping()
|
||||
|
||||
|
||||
@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"])
|
||||
async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
if remote_config.get("package") != "docker":
|
||||
raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote")
|
||||
|
||||
# Check immutable_patterns against the image name (e.g. "library/nginx")
|
||||
patterns = config.get_immutable_patterns(remote_name, "")
|
||||
if patterns:
|
||||
path_parts = path.split("/")
|
||||
image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path
|
||||
if not any(re.search(p, path) or re.search(p, image_name) for p in patterns):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path}")
|
||||
raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns")
|
||||
|
||||
remote_url = await construct_remote_url(remote_name, path)
|
||||
|
||||
cached_key = storage.get_object_key(remote_name, path)
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name))
|
||||
|
||||
if cached_key and is_mutable:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
if not await handle_expired_mutable(remote_name, path, remote_url):
|
||||
cached_key = None
|
||||
|
||||
if not cached_key:
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
result = await cache_single_artifact(remote_url, remote_name, path)
|
||||
if result["status"] == "error":
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}")
|
||||
if result["status"] == "cached" and is_mutable:
|
||||
cache_config = config.get_cache_config(remote_name)
|
||||
mutable_ttl = cache_config.get("mutable_ttl", 3600)
|
||||
cache.mark_index_cached(remote_name, path, mutable_ttl)
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
|
||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||
|
||||
is_blob = "/blobs/" in path
|
||||
if is_blob:
|
||||
content_type = "application/octet-stream"
|
||||
else:
|
||||
try:
|
||||
manifest_json = json.loads(artifact_data)
|
||||
content_type = manifest_json.get("mediaType")
|
||||
if not content_type:
|
||||
if "manifests" in manifest_json:
|
||||
content_type = "application/vnd.oci.image.index.v1+json"
|
||||
else:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
except Exception:
|
||||
content_type = "application/vnd.oci.image.manifest.v1+json"
|
||||
|
||||
digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}"
|
||||
headers = {
|
||||
"Docker-Distribution-Api-Version": "registry/2.0",
|
||||
"Docker-Content-Digest": digest,
|
||||
"Content-Length": str(len(artifact_data)),
|
||||
}
|
||||
|
||||
if request.method == "HEAD":
|
||||
return Response(status_code=200, headers=headers, media_type=content_type)
|
||||
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
return Response(content=artifact_data, media_type=content_type, headers=headers)
|
||||
return await docker_handler.proxy(request, remote_name, path, storage, cache, config, metrics)
|
||||
|
||||
|
||||
async def discover_artifacts(remote: str, include_pattern: str) -> list[str]:
|
||||
if "github.com" in remote:
|
||||
return await discover_github_releases(remote, include_pattern)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}")
|
||||
|
||||
|
||||
async def discover_github_releases(remote: str, include_pattern: str) -> list[str]:
|
||||
match = re.match(r"github\.com/([^/]+)/([^/]+)", remote)
|
||||
if not match:
|
||||
raise HTTPException(status_code=400, detail="Invalid GitHub remote format")
|
||||
|
||||
owner, repo = match.groups()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||
|
||||
if response.status_code != 200:
|
||||
raise HTTPException(
|
||||
status_code=response.status_code,
|
||||
detail=f"Failed to fetch releases: {response.text}",
|
||||
)
|
||||
|
||||
releases = response.json()
|
||||
|
||||
matching_urls = []
|
||||
pattern = include_pattern.replace("*", ".*")
|
||||
regex = re.compile(pattern)
|
||||
|
||||
for release in releases:
|
||||
for asset in release.get("assets", []):
|
||||
download_url = asset["browser_download_url"]
|
||||
if regex.search(download_url):
|
||||
matching_urls.append(download_url)
|
||||
|
||||
return matching_urls
|
||||
@app.get("/api/v1/remote/{remote_name}/{path:path}")
|
||||
async def get_artifact(request: Request, remote_name: str, path: str):
|
||||
return await proxy.handle(request, remote_name, path, storage, cache, config, database, metrics)
|
||||
|
||||
|
||||
@app.put("/api/v1/remote/{remote_name}/{path:path}")
|
||||
async def upload_file(remote_name: str, path: str, file: UploadFile = File(...)):
|
||||
"""Upload a file to local repository"""
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||
|
||||
try:
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
|
||||
# Calculate SHA256
|
||||
sha256_sum = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Check if file already exists (prevent overwrite)
|
||||
if database.file_exists(remote_name, path):
|
||||
raise HTTPException(status_code=409, detail="File already exists")
|
||||
|
||||
# Generate S3 key
|
||||
s3_key = f"local/{remote_name}/{path}"
|
||||
|
||||
# Determine content type
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
|
||||
# Upload to S3
|
||||
try:
|
||||
storage.upload(s3_key, content)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Upload failed: {e}")
|
||||
|
||||
# Add to database
|
||||
success = database.add_local_file(
|
||||
repository_name=remote_name,
|
||||
file_path=path,
|
||||
s3_key=s3_key,
|
||||
size_bytes=len(content),
|
||||
sha256_sum=sha256_sum,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
if not success:
|
||||
# Clean up S3 if database insert failed
|
||||
storage.delete_object(s3_key)
|
||||
raise HTTPException(status_code=500, detail="Failed to save file metadata")
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"message": "File uploaded successfully",
|
||||
"file_path": path,
|
||||
"size_bytes": len(content),
|
||||
"sha256_sum": sha256_sum,
|
||||
"content_type": content_type,
|
||||
}
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
||||
return await local.upload(remote_name, path, file, storage, database, config)
|
||||
|
||||
|
||||
@app.head("/api/v1/remote/{remote_name}/{path:path}")
|
||||
def check_file_exists(remote_name: str, path: str):
|
||||
"""Check if file exists (for CI jobs) - supports local repositories only"""
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
# Handle local repository
|
||||
if remote_config.get("type") == "local":
|
||||
try:
|
||||
metadata = database.get_local_file_metadata(remote_name, path)
|
||||
if not metadata:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
return Response(
|
||||
headers={
|
||||
"Content-Length": str(metadata["size_bytes"]),
|
||||
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||
"X-SHA256": metadata["sha256_sum"],
|
||||
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||
else:
|
||||
# For remote repositories, just return 405 Method Not Allowed
|
||||
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||
return local.check_exists(remote_name, path, database, config)
|
||||
|
||||
|
||||
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||
def delete_file(remote_name: str, path: str):
|
||||
"""Delete a file from local repository"""
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||
|
||||
try:
|
||||
# Get S3 key before deleting from database
|
||||
s3_key = database.delete_local_file(remote_name, path)
|
||||
if not s3_key:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Delete from S3
|
||||
if not storage.delete_object(s3_key):
|
||||
# File was deleted from database but not from S3 - log warning but continue
|
||||
print(f"Warning: Failed to delete S3 object {s3_key}")
|
||||
|
||||
return JSONResponse({"message": "File deleted successfully"})
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}")
|
||||
return local.delete(remote_name, path, storage, database, config)
|
||||
|
||||
|
||||
@app.post("/api/v1/artifacts/cache")
|
||||
async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(request.remote, request.include_pattern)
|
||||
|
||||
if not matching_urls:
|
||||
return {
|
||||
"message": "No matching artifacts found",
|
||||
"cached_count": 0,
|
||||
"artifacts": [],
|
||||
}
|
||||
|
||||
cached_artifacts = []
|
||||
|
||||
for url in matching_urls:
|
||||
result = await cache_single_artifact(url, "", "")
|
||||
cached_artifacts.append(result)
|
||||
|
||||
cached_count = sum(1 for artifact in cached_artifacts if artifact["status"] in ["cached", "already_cached"])
|
||||
|
||||
return {
|
||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||
"cached_count": cached_count,
|
||||
"artifacts": cached_artifacts,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
async def cache_artifact(request: ArtifactRequest):
|
||||
return await discovery.cache_artifacts(request.remote, request.include_pattern, storage)
|
||||
|
||||
|
||||
@app.get("/api/v1/artifacts/{remote:path}")
|
||||
async def list_cached_artifacts(remote: str, include_pattern: str = ".*") -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(remote, include_pattern)
|
||||
|
||||
cached_artifacts = []
|
||||
for url in matching_urls:
|
||||
# Extract path from URL for hierarchical key generation
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path
|
||||
key = storage.get_object_key(remote, path)
|
||||
if storage.exists(key):
|
||||
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||
|
||||
return {
|
||||
"remote": remote,
|
||||
"pattern": include_pattern,
|
||||
"total_found": len(matching_urls),
|
||||
"cached_count": len(cached_artifacts),
|
||||
"artifacts": cached_artifacts,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/metrics")
|
||||
def get_metrics(
|
||||
json: bool | None = Query(False, description="Return JSON format instead of Prometheus"),
|
||||
):
|
||||
"""Get comprehensive metrics about the artifact storage system"""
|
||||
config._check_reload()
|
||||
|
||||
if json:
|
||||
# Return JSON format
|
||||
return metrics.get_metrics(storage, config)
|
||||
else:
|
||||
# Return Prometheus format
|
||||
metrics.get_metrics(storage, config) # Update gauges
|
||||
prometheus_data = generate_latest().decode("utf-8")
|
||||
return PlainTextResponse(prometheus_data, media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
@app.get("/config")
|
||||
def get_config():
|
||||
return config.config
|
||||
async def list_cached_artifacts(remote: str, include_pattern: str = ".*"):
|
||||
return await discovery.list_artifacts(remote, include_pattern, storage)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
from . import generic, helm, npm, python, rpm
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
|
||||
@@ -0,0 +1,16 @@
|
||||
def get_content_type(filename: str) -> str:
|
||||
if filename.endswith((".tar.gz", ".tgz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||
return "application/zip"
|
||||
if filename.endswith(".exe"):
|
||||
return "application/x-msdownload"
|
||||
if filename.endswith(".rpm"):
|
||||
return "application/x-rpm"
|
||||
if filename.endswith(".xml"):
|
||||
return "application/xml"
|
||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith((".yaml", ".yml")):
|
||||
return "text/yaml"
|
||||
return "application/octet-stream"
|
||||
@@ -0,0 +1,3 @@
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["get_content_type"]
|
||||
@@ -0,0 +1,18 @@
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if filename == "index.yaml":
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/yaml"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,21 @@
|
||||
import re
|
||||
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
immutable_patterns: list[str],
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if not any(re.search(p, path) for p in immutable_patterns):
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "application/json"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,32 @@
|
||||
import re
|
||||
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def construct_url(base_url: str, path: str) -> str:
|
||||
"""Build the upstream URL for a PyPI request.
|
||||
|
||||
PyPI splits simple/ index pages (pypi.org) from file downloads
|
||||
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
|
||||
"""
|
||||
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||
return f"https://pypi.org/{path}"
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
immutable_patterns: list[str],
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if not any(re.search(p, path) for p in immutable_patterns):
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/html; charset=utf-8"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,3 @@
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["get_content_type"]
|
||||
@@ -0,0 +1,3 @@
|
||||
from .s3 import S3Storage
|
||||
|
||||
__all__ = ["S3Storage"]
|
||||
@@ -41,7 +41,6 @@ class S3Storage:
|
||||
|
||||
self.client = boto3.client("s3", **client_kwargs)
|
||||
|
||||
# Try to ensure bucket exists, but don't fail if MinIO isn't ready yet
|
||||
try:
|
||||
self._ensure_bucket_exists()
|
||||
except Exception as e:
|
||||
@@ -55,25 +54,21 @@ class S3Storage:
|
||||
self.client.create_bucket(Bucket=self.bucket)
|
||||
|
||||
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||
# Extract directory path and filename
|
||||
clean_path = path.lstrip("/")
|
||||
filename = os.path.basename(clean_path)
|
||||
directory_path = os.path.dirname(clean_path)
|
||||
|
||||
# Special handling for Docker registry blobs (use digest as key for deduplication)
|
||||
# Docker blobs are keyed by digest for deduplication across images
|
||||
if "/blobs/sha256:" in clean_path:
|
||||
# Extract the SHA256 digest for Docker blobs
|
||||
parts = clean_path.split("/blobs/sha256:")
|
||||
if len(parts) == 2:
|
||||
digest = parts[1]
|
||||
return f"{remote_name}/blobs/sha256/{digest}"
|
||||
|
||||
# Hash the directory path to keep keys manageable while preserving remote structure
|
||||
if directory_path:
|
||||
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
||||
return f"{remote_name}/{path_hash}/{filename}"
|
||||
else:
|
||||
# If no directory, just use remote and filename
|
||||
return f"{remote_name}/{filename}"
|
||||
|
||||
def exists(self, key: str) -> bool:
|
||||
@@ -72,6 +72,50 @@ TEST_REMOTES = {
|
||||
"package": "generic",
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
"pypi-test": {
|
||||
"base_url": "https://files.pythonhosted.org",
|
||||
"type": "remote",
|
||||
"package": "pypi",
|
||||
"immutable_patterns": [
|
||||
r"packages/.*\.whl$",
|
||||
r"packages/.*\.whl\.metadata$",
|
||||
r"packages/.*\.tar\.gz$",
|
||||
],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"npm-test": {
|
||||
"base_url": "https://registry.npmjs.org",
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"immutable_patterns": [r"\.tgz$"],
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
|
||||
},
|
||||
"helm-test": {
|
||||
"base_url": "https://helm.releases.hashicorp.com",
|
||||
"type": "remote",
|
||||
"package": "helm",
|
||||
"immutable_patterns": [r"\.tgz$"],
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
|
||||
},
|
||||
"quarantine-test": {
|
||||
"base_url": "https://releases.example.com",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||
"quarantine_new": True,
|
||||
"quarantine_days": 3,
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
"quarantine-disabled": {
|
||||
"base_url": "https://releases.example.com",
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"immutable_patterns": [r".*\.tar\.gz$"],
|
||||
"quarantine_new": False,
|
||||
"quarantine_days": 3,
|
||||
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -283,3 +283,47 @@ class TestMutableMeta:
|
||||
|
||||
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
||||
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# artifact published date (quarantine support)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestArtifactPublished:
|
||||
def test_key_format_is_deterministic(self, bare_cache):
|
||||
path = "some/path/package-1.0.tar.gz"
|
||||
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
||||
assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}"
|
||||
|
||||
def test_key_hash_is_16_chars(self, bare_cache):
|
||||
key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl")
|
||||
assert len(key.split(":")[-1]) == 16
|
||||
|
||||
def test_different_paths_produce_different_keys(self, bare_cache):
|
||||
k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz")
|
||||
k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz")
|
||||
assert k1 != k2
|
||||
|
||||
def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client):
|
||||
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
||||
cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm)
|
||||
expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz")
|
||||
mock_redis_client.set.assert_called_once_with(expected_key, lm)
|
||||
|
||||
def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client):
|
||||
lm = "Tue, 15 Mar 2022 12:00:00 GMT"
|
||||
mock_redis_client.get.return_value = lm
|
||||
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||
assert result == lm
|
||||
|
||||
def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client):
|
||||
mock_redis_client.get.return_value = None
|
||||
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
||||
assert result is None
|
||||
|
||||
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
||||
unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT")
|
||||
|
||||
def test_get_returns_none_when_unavailable(self, unavailable_cache):
|
||||
assert unavailable_cache.get_artifact_published("remote", "path") is None
|
||||
|
||||
@@ -133,6 +133,58 @@ class TestGetMutablePatterns:
|
||||
assert r"repomd\.xml$" in patterns
|
||||
assert r"custom-meta\.xml$" in patterns
|
||||
|
||||
def test_npm_has_no_package_defaults(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "npm", "base_url": "https://x.com"}})
|
||||
assert cfg.get_mutable_patterns("r") == []
|
||||
|
||||
def test_npm_explicit_mutable_pattern_matches_metadata(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert any(re.search(p, "express") for p in patterns)
|
||||
assert any(re.search(p, "@babel/core") for p in patterns)
|
||||
|
||||
def test_helm_returns_index_yaml_as_mutable(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert r"index\.yaml$" in patterns
|
||||
|
||||
def test_helm_chart_tarballs_not_mutable_by_default(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config({"r": {"type": "remote", "package": "helm", "base_url": "https://helm.example.com"}})
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
# Only index.yaml is mutable; .tgz chart tarballs are not
|
||||
assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns)
|
||||
assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns)
|
||||
|
||||
def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config):
|
||||
import re
|
||||
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "npm",
|
||||
"base_url": "https://x.com",
|
||||
"mutable_patterns": [r"^(?!.*\.tgz$).*"],
|
||||
}
|
||||
}
|
||||
)
|
||||
patterns = cfg.get_mutable_patterns("r")
|
||||
assert not any(re.search(p, "express-4.18.2.tgz") for p in patterns)
|
||||
assert not any(re.search(p, "express/-/express-4.18.2.tgz") for p in patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_immutable_patterns
|
||||
@@ -299,3 +351,190 @@ class TestConfigReload:
|
||||
cfg._check_reload()
|
||||
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_quarantine_config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetQuarantineConfig:
|
||||
def test_returns_false_zero_when_not_configured(self, make_config):
|
||||
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
|
||||
enabled, days = cfg.get_quarantine_config("r")
|
||||
assert enabled is False
|
||||
assert days == 0
|
||||
|
||||
def test_returns_false_zero_for_missing_remote(self, make_config):
|
||||
cfg = make_config({})
|
||||
enabled, days = cfg.get_quarantine_config("nonexistent")
|
||||
assert enabled is False
|
||||
assert days == 0
|
||||
|
||||
def test_enabled_true_and_days_returned(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"quarantine_new": True,
|
||||
"quarantine_days": 7,
|
||||
}
|
||||
}
|
||||
)
|
||||
enabled, days = cfg.get_quarantine_config("r")
|
||||
assert enabled is True
|
||||
assert days == 7
|
||||
|
||||
def test_quarantine_new_false_returns_disabled(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"quarantine_new": False,
|
||||
"quarantine_days": 7,
|
||||
}
|
||||
}
|
||||
)
|
||||
enabled, days = cfg.get_quarantine_config("r")
|
||||
assert enabled is False
|
||||
assert days == 7
|
||||
|
||||
def test_enabled_with_zero_days_returns_zero(self, make_config):
|
||||
cfg = make_config(
|
||||
{
|
||||
"r": {
|
||||
"type": "remote",
|
||||
"package": "generic",
|
||||
"base_url": "https://x.com",
|
||||
"quarantine_new": True,
|
||||
"quarantine_days": 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
enabled, days = cfg.get_quarantine_config("r")
|
||||
assert enabled is True
|
||||
assert days == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Directory mode (CONFIG_PATH points to a directory)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _remote(base_url: str = "https://x.com") -> dict:
|
||||
return {"type": "remote", "package": "generic", "base_url": base_url}
|
||||
|
||||
|
||||
class TestConfigDirMode:
|
||||
def test_loads_all_yaml_files(self, tmp_path):
|
||||
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||
cfg = ConfigManager(str(tmp_path))
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
assert "repo-b" in cfg.config["remotes"]
|
||||
|
||||
def test_later_file_overrides_earlier_on_same_key(self, tmp_path):
|
||||
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://first.com")}}))
|
||||
(tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://second.com")}}))
|
||||
cfg = ConfigManager(str(tmp_path))
|
||||
assert cfg.config["remotes"]["r"]["base_url"] == "https://second.com"
|
||||
|
||||
def test_empty_directory_returns_empty_remotes(self, tmp_path):
|
||||
cfg = ConfigManager(str(tmp_path))
|
||||
assert cfg.config == {"remotes": {}}
|
||||
|
||||
def test_ignores_non_yaml_files(self, tmp_path):
|
||||
(tmp_path / "notes.txt").write_text("not yaml")
|
||||
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||
cfg = ConfigManager(str(tmp_path))
|
||||
assert list(cfg.config["remotes"].keys()) == ["repo-a"]
|
||||
|
||||
def test_reload_picks_up_new_file(self, tmp_path):
|
||||
(tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||
cfg = ConfigManager(str(tmp_path))
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
assert "repo-b" not in cfg.config["remotes"]
|
||||
|
||||
new_file = tmp_path / "b.yaml"
|
||||
new_file.write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}}))
|
||||
future_mtime = cfg._last_modified + 1
|
||||
os.utime(str(new_file), (future_mtime, future_mtime))
|
||||
|
||||
cfg._check_reload()
|
||||
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
assert "repo-b" in cfg.config["remotes"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# config_dir key (main file contains a config_dir pointer)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfigDirKey:
|
||||
def test_merges_remotes_from_config_dir(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
(conf_d / "remotes.yaml").write_text(yaml.dump({"remotes": {"repo-extra": _remote("https://extra.com")}}))
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert "repo-main" in cfg.config["remotes"]
|
||||
assert "repo-extra" in cfg.config["remotes"]
|
||||
|
||||
def test_relative_config_dir_resolved_from_main_file(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
(conf_d / "r.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}}))
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": "conf.d", "remotes": {}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert "repo-a" in cfg.config["remotes"]
|
||||
|
||||
def test_config_dir_key_not_present_in_loaded_config(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert "config_dir" not in cfg.config
|
||||
|
||||
def test_dir_remote_overrides_main_file_remote(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
(conf_d / "override.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://new.com")}}))
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"r": _remote("https://old.com")}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert cfg.config["remotes"]["r"]["base_url"] == "https://new.com"
|
||||
|
||||
def test_empty_config_dir_uses_main_file_only(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert list(cfg.config["remotes"].keys()) == ["repo-main"]
|
||||
|
||||
def test_reload_picks_up_changed_dir_file(self, tmp_path):
|
||||
conf_d = tmp_path / "conf.d"
|
||||
conf_d.mkdir()
|
||||
dir_file = conf_d / "r.yaml"
|
||||
dir_file.write_text(yaml.dump({"remotes": {"repo-v1": _remote()}}))
|
||||
main = tmp_path / "config.yaml"
|
||||
main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}}))
|
||||
cfg = ConfigManager(str(main))
|
||||
assert "repo-v1" in cfg.config["remotes"]
|
||||
|
||||
dir_file.write_text(yaml.dump({"remotes": {"repo-v2": _remote("https://v2.com")}}))
|
||||
future_mtime = cfg._last_modified + 1
|
||||
os.utime(str(dir_file), (future_mtime, future_mtime))
|
||||
|
||||
cfg._check_reload()
|
||||
|
||||
assert "repo-v2" in cfg.config["remotes"]
|
||||
assert "repo-v1" not in cfg.config["remotes"]
|
||||
|
||||
+441
-18
@@ -2,6 +2,7 @@
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import UTC
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -204,7 +205,7 @@ class TestDockerProxy:
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
@@ -226,7 +227,7 @@ class TestDockerProxy:
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
@@ -248,9 +249,9 @@ class TestDockerProxy:
|
||||
deps["cache"].is_index_valid.return_value = False # but TTL expired
|
||||
deps["storage"].download_object.return_value = manifest
|
||||
|
||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=True):
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
@@ -352,7 +353,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
@@ -369,7 +370,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
@@ -384,7 +385,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
):
|
||||
@@ -399,7 +400,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.main.cache_single_artifact",
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "error", "error": "upstream unreachable"},
|
||||
):
|
||||
@@ -430,7 +431,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=False):
|
||||
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=False):
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -446,8 +447,8 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
@@ -462,8 +463,8 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True):
|
||||
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None}
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
@@ -472,7 +473,7 @@ class TestGenericArtifactRoute:
|
||||
|
||||
def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps):
|
||||
"""When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed."""
|
||||
from artifactapi.main import UpstreamUnreachable
|
||||
from artifactapi.artifact.proxy import UpstreamUnreachable
|
||||
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
@@ -481,7 +482,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||
with patch("artifactapi.artifact.proxy.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")):
|
||||
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -496,7 +497,7 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
|
||||
with patch("artifactapi.main._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||
with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=False):
|
||||
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -510,8 +511,8 @@ class TestGenericArtifactRoute:
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = False
|
||||
|
||||
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock) as mock_check:
|
||||
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock) as mock_check:
|
||||
with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
|
||||
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
|
||||
client.get("/api/v1/remote/custom-index-test/metadata.json")
|
||||
|
||||
@@ -652,3 +653,425 @@ class TestConfigEndpoint:
|
||||
data = response.json()
|
||||
assert "remotes" in data
|
||||
assert "alpine-test" in data["remotes"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PyPI remote /api/v1/remote/pypi-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPyPIRemote:
|
||||
def test_simple_index_is_mutable(self, client, patched_deps):
|
||||
"""simple/ paths are detected as mutable (package-type default)."""
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/requests-2.31.0.tar.gz'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_simple_index_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""files.pythonhosted.org URLs in a cached simple index are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/requests-2.31.0.tar.gz'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
assert b"files.pythonhosted.org" not in response.content
|
||||
assert b"/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz" in response.content
|
||||
|
||||
def test_simple_index_content_type_is_html(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"<html></html>"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
assert response.status_code == 200
|
||||
assert "text/html" in response.headers["content-type"]
|
||||
|
||||
def test_simple_index_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
html = b"<html><body><a href='https://files.pythonhosted.org/packages/p-1.0.whl'>...</a></body></html>"
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = html
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/pypi-test/simple/requests/")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"files.pythonhosted.org" not in response.content
|
||||
|
||||
def test_wheel_file_immutable_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"PK wheel bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0-py3-none-any.whl")
|
||||
assert response.status_code == 200
|
||||
assert "application/zip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_sdist_immutable_returns_correct_content_type(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"tar bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
|
||||
def test_unknown_extension_on_pypi_remote_returns_403(self, client, patched_deps):
|
||||
"""Paths that don't match immutable_patterns and aren't mutable are blocked."""
|
||||
response = client.get("/api/v1/remote/pypi-test/packages/requests.unknown")
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# npm remote /api/v1/remote/npm-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNpmRemote:
|
||||
def test_package_metadata_is_mutable(self, client, patched_deps):
|
||||
"""Top-level package metadata paths are detected as mutable."""
|
||||
deps = patched_deps
|
||||
meta = b'{"name":"express","versions":{}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_metadata_tarball_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""registry.npmjs.org tarball URLs in metadata JSON are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/express/-/express-4.18.2.tgz"}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
assert b"/api/v1/remote/npm-test/express/-/express-4.18.2.tgz" in response.content
|
||||
|
||||
def test_metadata_content_type_is_json(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b'{"name":"express"}'
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express")
|
||||
assert response.status_code == 200
|
||||
assert "application/json" in response.headers["content-type"]
|
||||
|
||||
def test_scoped_package_metadata_rewritten(self, client, patched_deps):
|
||||
"""@scope/package metadata URLs are also rewritten back to the same npm-test remote."""
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/@babel/core/-/core-7.21.0.tgz"}}'
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/@babel/core")
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
assert b"/api/v1/remote/npm-test/@babel/core/-/core-7.21.0.tgz" in response.content
|
||||
|
||||
def test_tarball_not_rewritten(self, client, patched_deps):
|
||||
"""Tarball requests (.tgz) bypass URL rewriting and return binary."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"\x1f\x8b tgz bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_metadata_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
meta = b'{"dist":{"tarball":"https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"}}'
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = meta
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/npm-test/lodash")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"registry.npmjs.org" not in response.content
|
||||
|
||||
def test_tarball_immutable_allowed_on_npm_remote(self, client, patched_deps):
|
||||
"""Tarballs (.tgz) match immutable_patterns and are served without rewriting."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"tgz bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helm remote /api/v1/remote/helm-test/...
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHelmRemote:
|
||||
def test_index_yaml_is_mutable(self, client, patched_deps):
|
||||
"""index.yaml is detected as mutable (package-type default)."""
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
deps["cache"].mark_index_cached.assert_not_called()
|
||||
|
||||
def test_index_yaml_urls_rewritten_to_proxy(self, client, patched_deps):
|
||||
"""base_url chart URLs in a cached index.yaml are rewritten to our proxy."""
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
assert b"helm.releases.hashicorp.com" not in response.content
|
||||
assert b"/api/v1/remote/helm-test/vault-0.29.1.tgz" in response.content
|
||||
|
||||
def test_index_yaml_content_type_is_yaml(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n"
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
deps["cache"].is_index_valid.return_value = True
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
assert response.status_code == 200
|
||||
assert "text/yaml" in response.headers["content-type"]
|
||||
|
||||
def test_chart_tarball_immutable_returns_gzip_content_type(self, client, patched_deps):
|
||||
"""Versioned chart tarballs match immutable_patterns and are served as binary."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"\x1f\x8b chart bytes"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/vault-0.29.1.tgz")
|
||||
assert response.status_code == 200
|
||||
assert "application/gzip" in response.headers["content-type"]
|
||||
assert response.headers["X-Artifact-Source"] == "cache"
|
||||
|
||||
def test_index_yaml_cache_miss_fetches_upstream(self, client, patched_deps):
|
||||
deps = patched_deps
|
||||
index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n"
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = index
|
||||
deps["cache"].is_mutable_file.return_value = True
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached"},
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/helm-test/index.yaml")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
assert b"helm.releases.hashicorp.com" not in response.content
|
||||
|
||||
def test_non_tgz_non_yaml_path_blocked_by_pattern(self, client, patched_deps):
|
||||
"""Paths that don't match immutable_patterns and aren't mutable are blocked."""
|
||||
deps = patched_deps
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
response = client.get("/api/v1/remote/helm-test/vault.zip")
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quarantine (quarantine-test remote: quarantine_new=True, quarantine_days=3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestQuarantine:
|
||||
def _recent_date(self, days_ago=1):
|
||||
"""Return an HTTP-format date string N days in the past (within quarantine window)."""
|
||||
from datetime import datetime, timedelta
|
||||
from email.utils import format_datetime
|
||||
|
||||
dt = datetime.now(UTC) - timedelta(days=days_ago)
|
||||
return format_datetime(dt, usegmt=True)
|
||||
|
||||
def _old_date(self, days_ago=10):
|
||||
"""Return an HTTP-format date string N days in the past (outside quarantine window)."""
|
||||
from datetime import datetime, timedelta
|
||||
from email.utils import format_datetime
|
||||
|
||||
dt = datetime.now(UTC) - timedelta(days=days_ago)
|
||||
return format_datetime(dt, usegmt=True)
|
||||
|
||||
def test_cache_miss_recent_artifact_quarantined(self, client, patched_deps):
|
||||
"""Cache miss: artifact published within quarantine window → 404."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||
):
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 404
|
||||
assert "quarantined" in response.json()["detail"].lower()
|
||||
|
||||
def test_cache_miss_old_artifact_allowed(self, client, patched_deps):
|
||||
"""Cache miss: artifact published outside quarantine window → 200."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached", "last_modified": self._old_date()},
|
||||
):
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_cache_miss_no_last_modified_fails_open(self, client, patched_deps):
|
||||
"""Cache miss: no Last-Modified header → fail open (200, not quarantined)."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached", "last_modified": None},
|
||||
):
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_cache_hit_recent_artifact_quarantined(self, client, patched_deps):
|
||||
"""Cache hit: stored publish date within quarantine window → 404."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
deps["cache"].get_artifact_published.return_value = self._recent_date()
|
||||
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 404
|
||||
assert "quarantined" in response.json()["detail"].lower()
|
||||
|
||||
def test_cache_hit_old_artifact_allowed(self, client, patched_deps):
|
||||
"""Cache hit: stored publish date outside quarantine window → 200."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
deps["cache"].get_artifact_published.return_value = self._old_date()
|
||||
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_cache_hit_no_stored_date_fetches_upstream(self, client, patched_deps):
|
||||
"""Cache hit: no stored date → HEAD upstream to get Last-Modified."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = True
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
deps["cache"].get_artifact_published.return_value = None
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy._fetch_last_modified",
|
||||
new_callable=AsyncMock,
|
||||
return_value=self._old_date(),
|
||||
) as mock_fetch:
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
mock_fetch.assert_called_once()
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_quarantine_disabled_allows_recent_artifact(self, client, patched_deps):
|
||||
"""quarantine_new=False: recent artifacts are not blocked."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||
):
|
||||
response = client.get("/api/v1/remote/quarantine-disabled/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_quarantine_detail_includes_available_date(self, client, patched_deps):
|
||||
"""The 404 detail should include the date when the artifact becomes available."""
|
||||
deps = patched_deps
|
||||
deps["storage"].exists.return_value = False
|
||||
deps["storage"].download_object.return_value = b"content"
|
||||
deps["cache"].is_mutable_file.return_value = False
|
||||
|
||||
with patch(
|
||||
"artifactapi.artifact.proxy.cache_single_artifact",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"status": "cached", "last_modified": self._recent_date()},
|
||||
):
|
||||
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
|
||||
|
||||
assert response.status_code == 404
|
||||
detail = response.json()["detail"]
|
||||
assert "available after" in detail
|
||||
assert "3-day" in detail
|
||||
|
||||
Reference in New Issue
Block a user