feat: make index file patterns configurable per remote

Replace hardcoded is_index_file logic with regex patterns driven by
remotes.yaml. Package-level defaults (alpine/rpm/docker) are merged with
any extra patterns listed under index_patterns in the remote config.
This commit is contained in:
2026-04-25 18:40:45 +10:00
parent b3d12f4962
commit 2414ddfdd3
3 changed files with 43 additions and 24 deletions
+6 -21
View File
@@ -1,3 +1,4 @@
import re
import time
import hashlib
import redis
@@ -17,27 +18,11 @@ class RedisCache:
self.client = None
self.available = False
def is_index_file(self, file_path: str) -> bool:
"""Check if the file is an index file that should have TTL"""
return (
file_path.endswith("APKINDEX.tar.gz")
or file_path.endswith("Packages.gz")
or file_path.endswith("repomd.xml")
or ("repodata/" in file_path
and file_path.endswith((
".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst",
".sqlite", ".sqlite.gz", ".sqlite.bz2", ".sqlite.xz", ".sqlite.zck", ".sqlite.zst",
".yaml.xz", ".yaml.gz", ".yaml.bz2", ".yaml.zst",
".asc", ".txt"
)))
# Docker tag-based manifests are mutable (index); digest-pinned are immutable (file)
or (
"/manifests/" in file_path
and not file_path.split("/manifests/", 1)[1].startswith("sha256:")
)
or "/tags/list" in file_path
or file_path.endswith("/tags/list")
)
def is_index_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
"""Return True if file_path matches any of the index patterns."""
if patterns is None:
patterns = []
return any(re.search(p, file_path) for p in patterns)
def get_index_cache_key(self, remote_name: str, path: str) -> str:
"""Generate cache key for index files"""
+33
View File
@@ -4,6 +4,25 @@ import yaml
from typing import Optional
_PACKAGE_INDEX_PATTERNS: dict[str, list[str]] = {
"alpine": [
r"APKINDEX\.tar\.gz$",
],
"rpm": [
r"repomd\.xml$",
r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst"
r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst"
r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$",
r"Packages\.gz$",
],
"docker": [
r"/manifests/(?!sha256:)[^/]+$",
r"/tags/list$",
],
"generic": [],
}
class ConfigManager:
def __init__(self, config_file: str = "remotes.yaml"):
self.config_file = config_file
@@ -111,6 +130,20 @@ class ConfigManager:
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
return {"url": db_url}
def get_index_patterns(self, remote_name: str) -> list[str]:
"""Return index-file patterns for a remote.
Merges the package-level defaults with any extra patterns listed under
``index_patterns`` in the remote's config.
"""
remote_config = self.get_remote_config(remote_name)
if not remote_config:
return []
package = remote_config.get("package", "generic")
defaults = _PACKAGE_INDEX_PATTERNS.get(package, [])
extra = remote_config.get("index_patterns", [])
return defaults + [p for p in extra if p not in defaults]
def get_cache_config(self, remote_name: str) -> dict:
"""Get cache configuration for a specific remote"""
remote_config = self.get_remote_config(remote_name)
+4 -3
View File
@@ -179,7 +179,8 @@ async def check_artifact_patterns(
remote_name: str, repo_path: str, file_path: str, full_path: str
) -> bool:
# First check if this is an index file - always allow index files
if cache.is_index_file(file_path) or cache.is_index_file(full_path):
index_patterns = config.get_index_patterns(remote_name)
if cache.is_index_file(file_path, index_patterns) or cache.is_index_file(full_path, index_patterns):
return True
# Then check basic include patterns
@@ -319,7 +320,7 @@ async def get_artifact(remote_name: str, path: str):
# For index files, check Redis TTL validity
filename = os.path.basename(path)
is_index = cache.is_index_file(path) # Check full path, not just filename
is_index = cache.is_index_file(path, config.get_index_patterns(remote_name))
if cached_key and is_index:
# Index file exists, but check if it's still valid
@@ -467,7 +468,7 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str):
if not storage.exists(cached_key):
cached_key = None
is_index = cache.is_index_file(path)
is_index = cache.is_index_file(path, config.get_index_patterns(remote_name))
if cached_key and is_index:
if not cache.is_index_valid(remote_name, path):