feat: add check_mutable_updates flag for conditional upstream revalidation

When check_mutable_updates: true is set on a remote, expired user-defined
mutable files are revalidated before re-downloading:

- On expiry a conditional HEAD is sent with If-None-Match / If-Modified-Since
- 304 Not Modified: TTL is refreshed in Redis, S3 cache is untouched
- 200 / no conditional support: cache is invalidated and file re-downloaded
- Network error: safe fallback — assume changed, re-download

ETag and Last-Modified from upstream responses are stored in Redis under
mutable:meta:<remote>:<hash> (no expiry, cleaned up on re-download or
cache flush). The flag only applies to user-configured mutable_patterns;
built-in package-type defaults (APKINDEX, repomd.xml, Docker manifests)
are always re-fetched unconditionally.

cache/flush also clears mutable:meta:* keys alongside index:* keys.
This commit is contained in:
2026-04-27 01:00:00 +10:00
parent 8bc9285117
commit 8fe4bac2b9
8 changed files with 265 additions and 16 deletions
+14 -1
View File
@@ -70,6 +70,10 @@ remotes:
# Branch archives can change on every push
- ".*/archive/refs/heads/main\\.tar\\.gz$"
- ".*/archive/refs/heads/master\\.tar\\.gz$"
# Before re-downloading an expired branch archive, check whether it has
# actually changed (304 Not Modified → just refresh the TTL, no transfer).
# Only applies to user-defined mutable_patterns, not package-type defaults.
check_mutable_updates: true
cache:
immutable_ttl: 0 # Tag archives cached indefinitely
mutable_ttl: 86400 # Branch archives refreshed after 1 day
@@ -113,6 +117,9 @@ remotes:
description: "Alpine Linux APK package repository"
immutable_patterns:
- ".*/x86_64/.*\\.apk$"
# check_mutable_updates not set: APKINDEX.tar.gz is a package-type default
# and is always re-fetched on expiry — conditional checks are skipped for
# built-in mutable patterns regardless of this flag.
cache:
immutable_ttl: 0 # Files cached indefinitely
mutable_ttl: 7200 # Index files (APKINDEX.tar.gz) cached for 2 hours
@@ -127,9 +134,12 @@ remotes:
- ".*/noarch/.*\\.rpm$"
- ".*/repodata/.*$"
- ".*\\.rpm$" # Allow all RPM files
# repomd.xml / repodata are package-type defaults — always re-fetched on
# expiry. check_mutable_updates would only apply to any custom
# mutable_patterns added here.
cache:
immutable_ttl: 0 # Files cached indefinitely
mutable_ttl: 7200 # Metadata files cached for 1 hour
mutable_ttl: 7200 # Metadata files cached for 2 hours
epel:
base_url: "http://mirror.aarnet.edu.au/pub/epel"
@@ -168,6 +178,9 @@ remotes:
description: "GitHub Container Registry"
# username: "your-github-username"
# password: "your-github-pat" # needs read:packages scope
# Docker manifest/tag-list patterns are package-type defaults — always
# re-fetched on expiry. check_mutable_updates only applies to any custom
# mutable_patterns you add (e.g. a metadata endpoint).
cache:
immutable_ttl: 0
mutable_ttl: 300
+42 -7
View File
@@ -26,11 +26,13 @@ class RedisCache:
return any(re.search(p, file_path) for p in patterns)
def get_index_cache_key(self, remote_name: str, path: str) -> str:
"""Generate cache key for index files"""
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
def is_index_valid(self, remote_name: str, path: str, ttl_override: int = None) -> bool:
"""Check if index file is still valid (not expired)"""
def get_mutable_meta_key(self, remote_name: str, path: str) -> str:
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
def is_index_valid(self, remote_name: str, path: str) -> bool:
"""Check if mutable file is still within its TTL window."""
if not self.available:
return False
@@ -41,7 +43,7 @@ class RedisCache:
return False
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
"""Mark index file as cached with TTL"""
"""Set or refresh the TTL key for a mutable file."""
if not self.available:
return
@@ -51,13 +53,45 @@ class RedisCache:
except Exception:
pass
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
"""Persist ETag and Last-Modified for future conditional requests."""
if not self.available:
return
data = {}
if etag:
data["etag"] = etag
if last_modified:
data["last_modified"] = last_modified
if not data:
return
try:
self.client.hset(self.get_mutable_meta_key(remote_name, path), mapping=data)
except Exception:
pass
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
if not self.available:
return {}
try:
return self.client.hgetall(self.get_mutable_meta_key(remote_name, path)) or {}
except Exception:
return {}
def delete_mutable_meta(self, remote_name: str, path: str) -> None:
if not self.available:
return
try:
self.client.delete(self.get_mutable_meta_key(remote_name, path))
except Exception:
pass
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
"""Remove expired index from S3 storage"""
"""Remove an expired mutable file from S3 and clear its Redis meta."""
if not self.available:
return
try:
# Construct the URL the same way as in the main flow
import os
from .config import ConfigManager
@@ -69,9 +103,10 @@ class RedisCache:
if remote_config:
base_url = remote_config.get("base_url")
if base_url:
# Use hierarchical path-based key (same as cache_single_artifact)
s3_key = storage.get_object_key(remote_name, path)
if storage.exists(s3_key):
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
except Exception:
pass
self.delete_mutable_meta(remote_name, path)
+7
View File
@@ -124,6 +124,13 @@ class ConfigManager:
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
return {"url": db_url}
def get_user_mutable_patterns(self, remote_name: str) -> list[str]:
"""Return only user-configured mutable_patterns, excluding package-type defaults."""
remote_config = self.get_remote_config(remote_name)
if not remote_config:
return []
return remote_config.get("mutable_patterns", [])
def get_mutable_patterns(self, remote_name: str) -> list[str]:
"""Return mutable-file patterns for a remote (TTL is configured per-remote in cache.index_ttl).
+59 -1
View File
@@ -87,8 +87,10 @@ def flush_cache(
if cache_type in ["all", "index"]:
if remote:
patterns.append(f"index:{remote}:*")
patterns.append(f"mutable:meta:{remote}:*")
else:
patterns.append("index:*")
patterns.append("mutable:meta:*")
if cache_type in ["all", "metrics"]:
if remote:
@@ -240,12 +242,36 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
"storage_path": storage_path,
"size": len(response.content),
"status": "cached",
"etag": response.headers.get("ETag"),
"last_modified": response.headers.get("Last-Modified"),
}
except Exception as e:
return {"url": url, "status": "error", "error": str(e)}
async def check_upstream_changed(remote_url: str, remote_name: str, path: str) -> bool:
"""Conditional HEAD against upstream. Returns False only on a definitive 304."""
meta = cache.get_mutable_meta(remote_name, path)
if not meta:
return True
headers = {}
if meta.get("etag"):
headers["If-None-Match"] = meta["etag"]
if meta.get("last_modified"):
headers["If-Modified-Since"] = meta["last_modified"]
if not headers:
return True
try:
async with httpx.AsyncClient(follow_redirects=True) as client:
response = await client.head(remote_url, headers=headers)
return response.status_code != 304
except Exception:
return True
@app.get("/api/v1/remote/{remote_name}/{path:path}")
async def get_artifact(remote_name: str, path: str):
# Check if remote is configured
@@ -302,9 +328,23 @@ async def get_artifact(remote_name: str, path: str):
if cached_key and is_mutable:
if not cache.is_index_valid(remote_name, path):
remote_cfg = config.get_remote_config(remote_name) or {}
check_updates = remote_cfg.get("check_mutable_updates", False)
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
if user_mutable:
changed = await check_upstream_changed(remote_url, remote_name, path)
if not changed:
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
cache.mark_index_cached(remote_name, path, mutable_ttl)
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
else:
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
cache.cleanup_expired_index(storage, remote_name, path)
cached_key = None
else:
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
cache.cleanup_expired_index(storage, remote_name, path)
cached_key = None # Force re-download
cached_key = None
if cached_key:
# Return cached artifact
@@ -362,6 +402,8 @@ async def get_artifact(remote_name: str, path: str):
mutable_ttl = cache_config.get("mutable_ttl", 3600)
cache.mark_index_cached(remote_name, path, mutable_ttl)
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
if result.get("etag") or result.get("last_modified"):
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
# Now return the cached artifact
try:
@@ -439,6 +481,20 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str):
if cached_key and is_mutable:
if not cache.is_index_valid(remote_name, path):
remote_cfg = config.get_remote_config(remote_name) or {}
check_updates = remote_cfg.get("check_mutable_updates", False)
user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name))
if user_mutable:
changed = await check_upstream_changed(remote_url, remote_name, path)
if not changed:
mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600)
cache.mark_index_cached(remote_name, path, mutable_ttl)
logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)")
else:
logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading")
cache.cleanup_expired_index(storage, remote_name, path)
cached_key = None
else:
logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache")
cache.cleanup_expired_index(storage, remote_name, path)
cached_key = None
@@ -453,6 +509,8 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str):
mutable_ttl = cache_config.get("mutable_ttl", 3600)
cache.mark_index_cached(remote_name, path, mutable_ttl)
logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)")
if result.get("etag") or result.get("last_modified"):
cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified"))
artifact_data = storage.download_object(storage.get_object_key(remote_name, path))
+8
View File
@@ -59,6 +59,14 @@ TEST_REMOTES = {
"mutable_patterns": ["metadata\\.json$"],
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
},
"check-mutable-test": {
"base_url": "https://example.com",
"type": "remote",
"package": "generic",
"mutable_patterns": ["metadata\\.json$"],
"check_mutable_updates": True,
"cache": {"immutable_ttl": 0, "mutable_ttl": 600},
},
"local-test": {
"type": "local",
"package": "generic",
+48
View File
@@ -235,3 +235,51 @@ class TestIndexValidity:
# client is None when Redis is unavailable — setex cannot be called
assert unavailable_cache.client is None
unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise
# ---------------------------------------------------------------------------
# mutable meta (ETag / Last-Modified storage)
# ---------------------------------------------------------------------------
class TestMutableMeta:
def test_meta_key_format(self, bare_cache):
path = "repo/metadata.json"
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}"
def test_meta_key_hash_is_16_chars(self, bare_cache):
key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json")
assert len(key.split(":")[-1]) == 16
def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client):
mock_redis_client.hgetall.return_value = {"etag": '"abc123"'}
cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None)
mock_redis_client.hset.assert_called_once()
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
assert meta["etag"] == '"abc123"'
def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client):
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
mock_redis_client.hgetall.return_value = {"last_modified": lm}
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm)
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
assert meta["last_modified"] == lm
def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client):
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None)
mock_redis_client.hset.assert_not_called()
def test_store_no_op_when_unavailable(self, unavailable_cache):
unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise
def test_get_returns_empty_when_unavailable(self, unavailable_cache):
assert unavailable_cache.get_mutable_meta("remote", "path") == {}
def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client):
expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json")
cache_with_redis.delete_mutable_meta("remote", "path/meta.json")
mock_redis_client.delete.assert_called_once_with(expected_key)
def test_delete_no_op_when_unavailable(self, unavailable_cache):
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
+33
View File
@@ -208,6 +208,39 @@ class TestGetImmutablePatterns:
assert cfg.get_immutable_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"]
# ---------------------------------------------------------------------------
# get_user_mutable_patterns
# ---------------------------------------------------------------------------
class TestGetUserMutablePatterns:
def test_returns_only_user_patterns(self, make_config):
cfg = make_config(
{
"r": {
"type": "remote",
"package": "alpine",
"base_url": "https://x.com",
"mutable_patterns": [r"custom\.json$"],
}
}
)
assert cfg.get_user_mutable_patterns("r") == [r"custom\.json$"]
def test_excludes_package_defaults(self, make_config):
# Package defaults (APKINDEX etc.) must NOT appear here
cfg = make_config({"r": {"type": "remote", "package": "alpine", "base_url": "https://x.com"}})
assert cfg.get_user_mutable_patterns("r") == []
def test_returns_empty_for_missing_remote(self, make_config):
cfg = make_config({})
assert cfg.get_user_mutable_patterns("nonexistent") == []
def test_returns_empty_when_key_absent(self, make_config):
cfg = make_config({"r": {"type": "remote", "package": "generic", "base_url": "https://x.com"}})
assert cfg.get_user_mutable_patterns("r") == []
# ---------------------------------------------------------------------------
# get_cache_config
# ---------------------------------------------------------------------------
+49 -2
View File
@@ -419,6 +419,53 @@ class TestGenericArtifactRoute:
response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz")
assert response.status_code == 200
def test_mutable_unchanged_refreshes_ttl_without_redownload(self, client, patched_deps):
"""When check_mutable_updates=True and upstream says 304, TTL is refreshed in place."""
deps = patched_deps
deps["storage"].exists.return_value = True
deps["storage"].download_object.return_value = b"metadata content"
# File is mutable and its TTL has expired
deps["cache"].is_mutable_file.return_value = True
deps["cache"].is_index_valid.return_value = False
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=False):
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
assert response.status_code == 200
deps["cache"].mark_index_cached.assert_called()
# S3 object must NOT have been deleted (no re-download)
deps["storage"].client.delete_object.assert_not_called()
def test_mutable_changed_triggers_redownload(self, client, patched_deps):
"""When check_mutable_updates=True and upstream says 200, cache is invalidated."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["cache"].is_mutable_file.return_value = True
deps["cache"].is_index_valid.return_value = False
deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'}
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock, return_value=True):
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
response = client.get("/api/v1/remote/check-mutable-test/metadata.json")
assert response.status_code == 502
def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps):
"""When check_mutable_updates is not set, expired mutable files are always re-fetched."""
deps = patched_deps
deps["storage"].exists.return_value = False
deps["cache"].is_mutable_file.return_value = True
deps["cache"].is_index_valid.return_value = False
with patch("artifactapi.main.check_upstream_changed", new_callable=AsyncMock) as mock_check:
with patch("artifactapi.main.cache_single_artifact", new_callable=AsyncMock) as mock_cache:
mock_cache.return_value = {"status": "error", "error": "upstream gone"}
client.get("/api/v1/remote/custom-index-test/metadata.json")
mock_check.assert_not_called()
def test_local_repo_file_not_found_returns_404(self, client, patched_deps):
deps = patched_deps
deps["database"].get_local_file_metadata.return_value = None
@@ -519,8 +566,8 @@ class TestCacheFlushEndpoint:
deps["cache"].available = True
redis_mock = MagicMock()
deps["cache"].client = redis_mock
# First pattern (index:*) returns keys; subsequent pattern returns nothing
redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], []]
# index:* returns keys; mutable:meta:* and metrics:* return nothing
redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], [], []]
deps["storage"].client.list_objects_v2.return_value = {}
response = client.put("/cache/flush")