feat: make index file patterns configurable per remote
Replace hardcoded is_index_file logic with regex patterns driven by remotes.yaml. Package-level defaults (alpine/rpm/docker) are merged with any extra patterns listed under index_patterns in the remote config.
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
import redis
|
||||
@@ -17,27 +18,11 @@ class RedisCache:
|
||||
self.client = None
|
||||
self.available = False
|
||||
|
||||
def is_index_file(self, file_path: str) -> bool:
|
||||
"""Check if the file is an index file that should have TTL"""
|
||||
return (
|
||||
file_path.endswith("APKINDEX.tar.gz")
|
||||
or file_path.endswith("Packages.gz")
|
||||
or file_path.endswith("repomd.xml")
|
||||
or ("repodata/" in file_path
|
||||
and file_path.endswith((
|
||||
".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst",
|
||||
".sqlite", ".sqlite.gz", ".sqlite.bz2", ".sqlite.xz", ".sqlite.zck", ".sqlite.zst",
|
||||
".yaml.xz", ".yaml.gz", ".yaml.bz2", ".yaml.zst",
|
||||
".asc", ".txt"
|
||||
)))
|
||||
# Docker tag-based manifests are mutable (index); digest-pinned are immutable (file)
|
||||
or (
|
||||
"/manifests/" in file_path
|
||||
and not file_path.split("/manifests/", 1)[1].startswith("sha256:")
|
||||
)
|
||||
or "/tags/list" in file_path
|
||||
or file_path.endswith("/tags/list")
|
||||
)
|
||||
def is_index_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||
"""Return True if file_path matches any of the index patterns."""
|
||||
if patterns is None:
|
||||
patterns = []
|
||||
return any(re.search(p, file_path) for p in patterns)
|
||||
|
||||
def get_index_cache_key(self, remote_name: str, path: str) -> str:
|
||||
"""Generate cache key for index files"""
|
||||
|
||||
@@ -4,6 +4,25 @@ import yaml
|
||||
from typing import Optional
|
||||
|
||||
|
||||
_PACKAGE_INDEX_PATTERNS: dict[str, list[str]] = {
|
||||
"alpine": [
|
||||
r"APKINDEX\.tar\.gz$",
|
||||
],
|
||||
"rpm": [
|
||||
r"repomd\.xml$",
|
||||
r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst"
|
||||
r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst"
|
||||
r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$",
|
||||
r"Packages\.gz$",
|
||||
],
|
||||
"docker": [
|
||||
r"/manifests/(?!sha256:)[^/]+$",
|
||||
r"/tags/list$",
|
||||
],
|
||||
"generic": [],
|
||||
}
|
||||
|
||||
|
||||
class ConfigManager:
|
||||
def __init__(self, config_file: str = "remotes.yaml"):
|
||||
self.config_file = config_file
|
||||
@@ -111,6 +130,20 @@ class ConfigManager:
|
||||
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
||||
return {"url": db_url}
|
||||
|
||||
def get_index_patterns(self, remote_name: str) -> list[str]:
|
||||
"""Return index-file patterns for a remote.
|
||||
|
||||
Merges the package-level defaults with any extra patterns listed under
|
||||
``index_patterns`` in the remote's config.
|
||||
"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
return []
|
||||
package = remote_config.get("package", "generic")
|
||||
defaults = _PACKAGE_INDEX_PATTERNS.get(package, [])
|
||||
extra = remote_config.get("index_patterns", [])
|
||||
return defaults + [p for p in extra if p not in defaults]
|
||||
|
||||
def get_cache_config(self, remote_name: str) -> dict:
|
||||
"""Get cache configuration for a specific remote"""
|
||||
remote_config = self.get_remote_config(remote_name)
|
||||
|
||||
@@ -179,7 +179,8 @@ async def check_artifact_patterns(
|
||||
remote_name: str, repo_path: str, file_path: str, full_path: str
|
||||
) -> bool:
|
||||
# First check if this is an index file - always allow index files
|
||||
if cache.is_index_file(file_path) or cache.is_index_file(full_path):
|
||||
index_patterns = config.get_index_patterns(remote_name)
|
||||
if cache.is_index_file(file_path, index_patterns) or cache.is_index_file(full_path, index_patterns):
|
||||
return True
|
||||
|
||||
# Then check basic include patterns
|
||||
@@ -319,7 +320,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
|
||||
# For index files, check Redis TTL validity
|
||||
filename = os.path.basename(path)
|
||||
is_index = cache.is_index_file(path) # Check full path, not just filename
|
||||
is_index = cache.is_index_file(path, config.get_index_patterns(remote_name))
|
||||
|
||||
if cached_key and is_index:
|
||||
# Index file exists, but check if it's still valid
|
||||
@@ -467,7 +468,7 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str):
|
||||
if not storage.exists(cached_key):
|
||||
cached_key = None
|
||||
|
||||
is_index = cache.is_index_file(path)
|
||||
is_index = cache.is_index_file(path, config.get_index_patterns(remote_name))
|
||||
|
||||
if cached_key and is_index:
|
||||
if not cache.is_index_valid(remote_name, path):
|
||||
|
||||
Reference in New Issue
Block a user