feat: quarantine new releases to prevent supply chain attacks
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful

Add per-remote quarantine support: when quarantine_new=true and quarantine_days=N,
immutable artifacts published within the last N days are blocked with 404 until
the quarantine window expires.

- ConfigManager.get_quarantine_config() reads quarantine_new/quarantine_days
- RedisCache.store/get_artifact_published() persist Last-Modified per artifact
- proxy._check_quarantine() enforces the window; fails open when date is unknown
- proxy._fetch_last_modified() HEAD-requests upstream to discover publish date
- Docker proxy route wires quarantine checks on both cache-hit and cache-miss
- remotes.yaml: quarantine_new/quarantine_days added to pypi example (3-day window)
- README: documents quarantine configuration
This commit is contained in:
2026-04-28 23:01:52 +10:00
parent 373366e695
commit 3bd3ca8b74
10 changed files with 414 additions and 0 deletions
+22
View File
@@ -352,3 +352,25 @@ Set `check_mutable_updates: true` to send `HEAD` with `If-None-Match` / `If-Modi
### Stale-on-upstream-error
When a mutable file expires and the upstream is unreachable (connection refused, DNS failure, timeout), the cached copy is kept and its TTL refreshed. HTTP error responses (4xx, 5xx) are not treated as network failures and proceed with normal expiry.
### Quarantine (supply-chain protection)
Set `quarantine_new: true` and `quarantine_days: N` on a remote to block immutable artifacts published within the last N days. Requests return `404` until the quarantine period expires, giving time to detect malicious packages before they are consumed.
```yaml
remotes:
pypi:
base_url: "https://files.pythonhosted.org"
type: "remote"
package: "pypi"
quarantine_new: true
quarantine_days: 3 # block packages published in the last 3 days
immutable_patterns:
- "packages/.*\\.whl$"
- "packages/.*\\.tar\\.gz$"
cache:
immutable_ttl: 0
mutable_ttl: 600
```
The upstream `Last-Modified` response header is used as the publish date proxy. Artifacts that have no `Last-Modified` header are allowed through (fail-open). Mutable files (index pages, tag manifests) are never quarantined.
+12
View File
@@ -9,6 +9,13 @@
# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change).
# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600).
#
# quarantine_new: Set to true to block immutable artifacts published within the last
# quarantine_days days. Requests return 404 until the quarantine period
# expires. Fails open when the publish date cannot be determined.
# quarantine_days: Number of days to quarantine newly published artifacts (requires
# quarantine_new: true). The upstream Last-Modified header is used as
# the publish date.
#
# WARNING: this file may contain credentials — do not commit real values.
#
# Global configuration
@@ -202,6 +209,11 @@ remotes:
# simple/ requests are transparently fetched from pypi.org; package files come from
# files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote.
check_mutable_updates: true
# Block packages published within the last 3 days (supply-chain attack mitigation).
# Immutable artifacts (wheel/sdist) newer than quarantine_days return 404 until
# the window passes. Disable by setting quarantine_new: false or removing both keys.
quarantine_new: true
quarantine_days: 3
immutable_patterns:
- "packages/.*\\.whl$"
- "packages/.*\\.whl\\.metadata$"
+12
View File
@@ -59,6 +59,18 @@ async def proxy(request: Request, remote_name: str, path: str, storage, cache, c
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
if result.get("etag") or result.get("last_modified"):
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
if not is_mutable:
published = result.get("last_modified")
if published:
cache.store_artifact_published(remote_name, path, published)
_proxy._check_quarantine(remote_name, published, config)
elif not is_mutable:
published = cache.get_artifact_published(remote_name, path)
if not published:
published = await _proxy._fetch_last_modified(remote_url, remote_config)
if published:
cache.store_artifact_published(remote_name, path, published)
_proxy._check_quarantine(remote_name, published, config)
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
+54
View File
@@ -2,6 +2,8 @@ import base64
import logging
import os
import re
from datetime import UTC, datetime, timedelta
from email.utils import parsedate_to_datetime
import httpx
from fastapi import HTTPException, Request, Response
@@ -19,6 +21,42 @@ class UpstreamUnreachable(Exception):
"""Raised when the upstream backend cannot be contacted (network or timeout error)."""
def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None:
"""Raise HTTP 404 if the artifact is within the per-remote quarantine window.
Fails open (allows the request) when the publish date cannot be determined.
"""
enabled, days = config.get_quarantine_config(remote_name)
if not enabled or not days:
return
if not last_modified_str:
return # cannot determine age → allow
try:
publish_date = parsedate_to_datetime(last_modified_str)
except Exception:
return # unparseable → allow
cutoff = datetime.now(UTC) - timedelta(days=days)
if publish_date > cutoff:
available_on = (publish_date + timedelta(days=days)).date()
raise HTTPException(
status_code=404,
detail=(
f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)"
),
)
async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None:
"""HEAD the upstream URL and return the Last-Modified header, or None on any failure."""
auth = _basic_auth_header(remote_cfg)
try:
async with httpx.AsyncClient(follow_redirects=True) as client:
response = await client.head(remote_url, headers=auth, timeout=10.0)
return response.headers.get("Last-Modified")
except Exception:
return None
def _basic_auth_header(remote_cfg: dict) -> dict[str, str]:
username = remote_cfg.get("username")
password = remote_cfg.get("password")
@@ -225,6 +263,14 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
cached_key = None
if cached_key:
if not is_mutable:
published = cache.get_artifact_published(remote_name, path)
if not published:
published = await _fetch_last_modified(remote_url, remote_config)
if published:
cache.store_artifact_published(remote_name, path, published)
_check_quarantine(remote_name, published, config)
try:
artifact_data = storage.download_object(cached_key)
artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name)
@@ -240,6 +286,8 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
"X-Artifact-Size": str(len(artifact_data)),
},
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
@@ -258,6 +306,12 @@ async def handle(request: Request, remote_name: str, path: str, storage, cache,
if result.get("etag") or result.get("last_modified"):
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
if not is_mutable:
published = result.get("last_modified")
if published:
cache.store_artifact_published(remote_name, path, published)
_check_quarantine(remote_name, published, config)
try:
cache_key = storage.get_object_key(remote_name, path)
artifact_data = storage.download_object(cache_key)
+21
View File
@@ -78,6 +78,27 @@ class RedisCache:
except Exception:
pass
def get_artifact_published_key(self, remote_name: str, path: str) -> str:
return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None:
"""Persist the upstream Last-Modified header for a (typically immutable) artifact."""
if not self.available:
return
try:
self.client.set(self.get_artifact_published_key(remote_name, path), last_modified)
except Exception:
pass
def get_artifact_published(self, remote_name: str, path: str) -> str | None:
"""Return the stored Last-Modified string for an artifact, or None."""
if not self.available:
return None
try:
return self.client.get(self.get_artifact_published_key(remote_name, path))
except Exception:
return None
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
if not self.available:
return
+13
View File
@@ -159,3 +159,16 @@ class ConfigManager:
return {}
return remote_config.get("cache", {})
def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]:
"""Return (enabled, quarantine_days) for a remote.
When enabled=True and quarantine_days>0, immutable artifacts published
within the last quarantine_days days are blocked with a 404.
"""
remote_config = self.get_remote_config(remote_name)
if not remote_config:
return False, 0
enabled = bool(remote_config.get("quarantine_new", False))
days = int(remote_config.get("quarantine_days", 0))
return enabled, days
+18
View File
@@ -98,6 +98,24 @@ TEST_REMOTES = {
"immutable_patterns": [r"\.tgz$"],
"cache": {"immutable_ttl": 0, "mutable_ttl": 3600},
},
"quarantine-test": {
"base_url": "https://releases.example.com",
"type": "remote",
"package": "generic",
"immutable_patterns": [r".*\.tar\.gz$"],
"quarantine_new": True,
"quarantine_days": 3,
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
},
"quarantine-disabled": {
"base_url": "https://releases.example.com",
"type": "remote",
"package": "generic",
"immutable_patterns": [r".*\.tar\.gz$"],
"quarantine_new": False,
"quarantine_days": 3,
"cache": {"immutable_ttl": 0, "mutable_ttl": 0},
},
}
}
+44
View File
@@ -283,3 +283,47 @@ class TestMutableMeta:
def test_delete_no_op_when_unavailable(self, unavailable_cache):
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
# ---------------------------------------------------------------------------
# artifact published date (quarantine support)
# ---------------------------------------------------------------------------
class TestArtifactPublished:
def test_key_format_is_deterministic(self, bare_cache):
path = "some/path/package-1.0.tar.gz"
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}"
def test_key_hash_is_16_chars(self, bare_cache):
key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl")
assert len(key.split(":")[-1]) == 16
def test_different_paths_produce_different_keys(self, bare_cache):
k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz")
k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz")
assert k1 != k2
def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client):
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm)
expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz")
mock_redis_client.set.assert_called_once_with(expected_key, lm)
def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client):
lm = "Tue, 15 Mar 2022 12:00:00 GMT"
mock_redis_client.get.return_value = lm
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
assert result == lm
def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client):
mock_redis_client.get.return_value = None
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
assert result is None
def test_store_no_op_when_unavailable(self, unavailable_cache):
unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT")
def test_get_returns_none_when_unavailable(self, unavailable_cache):
assert unavailable_cache.get_artifact_published("remote", "path") is None
+67
View File
@@ -351,3 +351,70 @@ class TestConfigReload:
cfg._check_reload()
assert "repo-a" in cfg.config["remotes"]
# ---------------------------------------------------------------------------
# get_quarantine_config
# ---------------------------------------------------------------------------
class TestGetQuarantineConfig:
def test_returns_false_zero_when_not_configured(self, make_config):
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
enabled, days = cfg.get_quarantine_config("r")
assert enabled is False
assert days == 0
def test_returns_false_zero_for_missing_remote(self, make_config):
cfg = make_config({})
enabled, days = cfg.get_quarantine_config("nonexistent")
assert enabled is False
assert days == 0
def test_enabled_true_and_days_returned(self, make_config):
cfg = make_config(
{
"r": {
"type": "remote",
"package": "generic",
"base_url": "https://x.com",
"quarantine_new": True,
"quarantine_days": 7,
}
}
)
enabled, days = cfg.get_quarantine_config("r")
assert enabled is True
assert days == 7
def test_quarantine_new_false_returns_disabled(self, make_config):
cfg = make_config(
{
"r": {
"type": "remote",
"package": "generic",
"base_url": "https://x.com",
"quarantine_new": False,
"quarantine_days": 7,
}
}
)
enabled, days = cfg.get_quarantine_config("r")
assert enabled is False
assert days == 7
def test_enabled_with_zero_days_returns_zero(self, make_config):
cfg = make_config(
{
"r": {
"type": "remote",
"package": "generic",
"base_url": "https://x.com",
"quarantine_new": True,
"quarantine_days": 0,
}
}
)
enabled, days = cfg.get_quarantine_config("r")
assert enabled is True
assert days == 0
+151
View File
@@ -2,6 +2,7 @@
import hashlib
import json
from datetime import UTC
from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
@@ -924,3 +925,153 @@ class TestHelmRemote:
response = client.get("/api/v1/remote/helm-test/vault.zip")
assert response.status_code == 403
# ---------------------------------------------------------------------------
# Quarantine (quarantine-test remote: quarantine_new=True, quarantine_days=3)
# ---------------------------------------------------------------------------
class TestQuarantine:
def _recent_date(self, days_ago=1):
"""Return an HTTP-format date string N days in the past (within quarantine window)."""
from datetime import datetime, timedelta
from email.utils import format_datetime
dt = datetime.now(UTC) - timedelta(days=days_ago)
return format_datetime(dt, usegmt=True)
def _old_date(self, days_ago=10):
"""Return an HTTP-format date string N days in the past (outside quarantine window)."""
from datetime import datetime, timedelta
from email.utils import format_datetime
dt = datetime.now(UTC) - timedelta(days=days_ago)
return format_datetime(dt, usegmt=True)
def test_cache_miss_recent_artifact_quarantined(self, client, patched_deps):
"""Cache miss: artifact published within quarantine window → 404."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
with patch(
"artifactapi.artifact.proxy.cache_single_artifact",
new_callable=AsyncMock,
return_value={"status": "cached", "last_modified": self._recent_date()},
):
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 404
assert "quarantined" in response.json()["detail"].lower()
def test_cache_miss_old_artifact_allowed(self, client, patched_deps):
"""Cache miss: artifact published outside quarantine window → 200."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
with patch(
"artifactapi.artifact.proxy.cache_single_artifact",
new_callable=AsyncMock,
return_value={"status": "cached", "last_modified": self._old_date()},
):
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 200
def test_cache_miss_no_last_modified_fails_open(self, client, patched_deps):
"""Cache miss: no Last-Modified header → fail open (200, not quarantined)."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
with patch(
"artifactapi.artifact.proxy.cache_single_artifact",
new_callable=AsyncMock,
return_value={"status": "cached", "last_modified": None},
):
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 200
def test_cache_hit_recent_artifact_quarantined(self, client, patched_deps):
"""Cache hit: stored publish date within quarantine window → 404."""
deps = patched_deps
deps["storage"].exists.return_value = True
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
deps["cache"].get_artifact_published.return_value = self._recent_date()
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 404
assert "quarantined" in response.json()["detail"].lower()
def test_cache_hit_old_artifact_allowed(self, client, patched_deps):
"""Cache hit: stored publish date outside quarantine window → 200."""
deps = patched_deps
deps["storage"].exists.return_value = True
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
deps["cache"].get_artifact_published.return_value = self._old_date()
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 200
def test_cache_hit_no_stored_date_fetches_upstream(self, client, patched_deps):
"""Cache hit: no stored date → HEAD upstream to get Last-Modified."""
deps = patched_deps
deps["storage"].exists.return_value = True
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
deps["cache"].get_artifact_published.return_value = None
with patch(
"artifactapi.artifact.proxy._fetch_last_modified",
new_callable=AsyncMock,
return_value=self._old_date(),
) as mock_fetch:
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
mock_fetch.assert_called_once()
assert response.status_code == 200
def test_quarantine_disabled_allows_recent_artifact(self, client, patched_deps):
"""quarantine_new=False: recent artifacts are not blocked."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
with patch(
"artifactapi.artifact.proxy.cache_single_artifact",
new_callable=AsyncMock,
return_value={"status": "cached", "last_modified": self._recent_date()},
):
response = client.get("/api/v1/remote/quarantine-disabled/some/path/package-1.0.tar.gz")
assert response.status_code == 200
def test_quarantine_detail_includes_available_date(self, client, patched_deps):
"""The 404 detail should include the date when the artifact becomes available."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["storage"].download_object.return_value = b"content"
deps["cache"].is_mutable_file.return_value = False
with patch(
"artifactapi.artifact.proxy.cache_single_artifact",
new_callable=AsyncMock,
return_value={"status": "cached", "last_modified": self._recent_date()},
):
response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz")
assert response.status_code == 404
detail = response.json()["detail"]
assert "available after" in detail
assert "3-day" in detail