feat: quarantine new releases to prevent supply chain attacks
Add per-remote quarantine support: when quarantine_new=true and quarantine_days=N, immutable artifacts published within the last N days are blocked with 404 until the quarantine window expires. - ConfigManager.get_quarantine_config() reads quarantine_new/quarantine_days - RedisCache.store/get_artifact_published() persist Last-Modified per artifact - proxy._check_quarantine() enforces the window; fails open when date is unknown - proxy._fetch_last_modified() HEAD-requests upstream to discover publish date - Docker proxy route wires quarantine checks on both cache-hit and cache-miss - remotes.yaml: quarantine_new/quarantine_days added to pypi example (3-day window) - README: documents quarantine configuration
This commit is contained in:
@@ -59,6 +59,18 @@ async def proxy(request: Request, remote_name: str, path: str, storage, cache, c
|
||||
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
if not is_mutable:
|
||||
published = result.get("last_modified")
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_proxy._check_quarantine(remote_name, published, config)
|
||||
elif not is_mutable:
|
||||
published = cache.get_artifact_published(remote_name, path)
|
||||
if not published:
|
||||
published = await _proxy._fetch_last_modified(remote_url, remote_config)
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_proxy._check_quarantine(remote_name, published, config)
|
||||
|
||||
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ import base64
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException, Request, Response
|
||||
@@ -19,6 +21,42 @@ class UpstreamUnreachable(Exception):
|
||||
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
|
||||
|
||||
|
||||
def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None:
|
||||
"""Raise HTTP 404 if the artifact is within the per-remote quarantine window.
|
||||
|
||||
Fails open (allows the request) when the publish date cannot be determined.
|
||||
"""
|
||||
enabled, days = config.get_quarantine_config(remote_name)
|
||||
if not enabled or not days:
|
||||
return
|
||||
if not last_modified_str:
|
||||
return # cannot determine age → allow
|
||||
try:
|
||||
publish_date = parsedate_to_datetime(last_modified_str)
|
||||
except Exception:
|
||||
return # unparseable → allow
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
if publish_date > cutoff:
|
||||
available_on = (publish_date + timedelta(days=days)).date()
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=(
|
||||
f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None:
|
||||
"""HEAD the upstream URL and return the Last-Modified header, or None on any failure."""
|
||||
auth = _basic_auth_header(remote_cfg)
|
||||
try:
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.head(remote_url, headers=auth, timeout=10.0)
|
||||
return response.headers.get("Last-Modified")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
|
||||
username = remote_cfg.get("username")
|
||||
password = remote_cfg.get("password")
|
||||
@@ -225,6 +263,14 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
|
||||
cached_key = None
|
||||
|
||||
if cached_key:
|
||||
if not is_mutable:
|
||||
published = cache.get_artifact_published(remote_name, path)
|
||||
if not published:
|
||||
published = await _fetch_last_modified(remote_url, remote_config)
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_check_quarantine(remote_name, published, config)
|
||||
|
||||
try:
|
||||
artifact_data = storage.download_object(cached_key)
|
||||
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
|
||||
@@ -240,6 +286,8 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
|
||||
"X-Artifact-Size": str(len(artifact_data)),
|
||||
},
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||
|
||||
@@ -258,6 +306,12 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
|
||||
if result.get("etag") or result.get("last_modified"):
|
||||
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
|
||||
|
||||
if not is_mutable:
|
||||
published = result.get("last_modified")
|
||||
if published:
|
||||
cache.store_artifact_published(remote_name, path, published)
|
||||
_check_quarantine(remote_name, published, config)
|
||||
|
||||
try:
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
artifact_data = storage.download_object(cache_key)
|
||||
|
||||
Vendored
+21
@@ -78,6 +78,27 @@ class RedisCache:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_artifact_published_key(self, remote_name: str, path: str) -> str:
|
||||
return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None:
|
||||
"""Persist the upstream Last-Modified header for a (typically immutable) artifact."""
|
||||
if not self.available:
|
||||
return
|
||||
try:
|
||||
self.client.set(self.get_artifact_published_key(remote_name, path), last_modified)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_artifact_published(self, remote_name: str, path: str) -> str | None:
|
||||
"""Return the stored Last-Modified string for an artifact, or None."""
|
||||
if not self.available:
|
||||
return None
|
||||
try:
|
||||
return self.client.get(self.get_artifact_published_key(remote_name, path))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
@@ -159,3 +159,16 @@ class ConfigManager:
|
||||
return {}
|
||||
|
||||
return remote_config.get("cache", {})
|
||||
|
||||
def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]:
|
||||
"""Return (enabled, quarantine_days) for a remote.
|
||||
|
||||
When enabled=True and quarantine_days>0, immutable artifacts published
|
||||
within the last quarantine_days days are blocked with a 404.
|
||||
"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return False, 0
|
||||
enabled = bool(remote_config.get("quarantine_new", False))
|
||||
days = int(remote_config.get("quarantine_days", 0))
|
||||
return enabled, days
|
||||
|
||||
Reference in New Issue
Block a user