diff --git a/src/artifactapi/cache.py b/src/artifactapi/cache.py index 1c37081..dac16ef 100644 --- a/src/artifactapi/cache.py +++ b/src/artifactapi/cache.py @@ -1,3 +1,4 @@ +import re import time import hashlib import redis @@ -17,27 +18,11 @@ class RedisCache: self.client = None self.available = False - def is_index_file(self, file_path: str) -> bool: - """Check if the file is an index file that should have TTL""" - return ( - file_path.endswith("APKINDEX.tar.gz") - or file_path.endswith("Packages.gz") - or file_path.endswith("repomd.xml") - or ("repodata/" in file_path - and file_path.endswith(( - ".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst", - ".sqlite", ".sqlite.gz", ".sqlite.bz2", ".sqlite.xz", ".sqlite.zck", ".sqlite.zst", - ".yaml.xz", ".yaml.gz", ".yaml.bz2", ".yaml.zst", - ".asc", ".txt" - ))) - # Docker tag-based manifests are mutable (index); digest-pinned are immutable (file) - or ( - "/manifests/" in file_path - and not file_path.split("/manifests/", 1)[1].startswith("sha256:") - ) - or "/tags/list" in file_path - or file_path.endswith("/tags/list") - ) + def is_index_file(self, file_path: str, patterns: list[str] | None = None) -> bool: + """Return True if file_path matches any of the index patterns.""" + if patterns is None: + patterns = [] + return any(re.search(p, file_path) for p in patterns) def get_index_cache_key(self, remote_name: str, path: str) -> str: """Generate cache key for index files""" diff --git a/src/artifactapi/config.py b/src/artifactapi/config.py index b316ab4..88d73fb 100644 --- a/src/artifactapi/config.py +++ b/src/artifactapi/config.py @@ -4,6 +4,25 @@ import yaml from typing import Optional +_PACKAGE_INDEX_PATTERNS: dict[str, list[str]] = { + "alpine": [ + r"APKINDEX\.tar\.gz$", + ], + "rpm": [ + r"repomd\.xml$", + r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst" + r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst" + r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$", + r"Packages\.gz$", + ], + "docker": [ + r"/manifests/(?!sha256:)[^/]+$", + r"/tags/list$", + ], + "generic": [], +} + + class ConfigManager: def __init__(self, config_file: str = "remotes.yaml"): self.config_file = config_file @@ -111,6 +130,20 @@ class ConfigManager: db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" return {"url": db_url} + def get_index_patterns(self, remote_name: str) -> list[str]: + """Return index-file patterns for a remote. + + Merges the package-level defaults with any extra patterns listed under + ``index_patterns`` in the remote's config. + """ + remote_config = self.get_remote_config(remote_name) + if not remote_config: + return [] + package = remote_config.get("package", "generic") + defaults = _PACKAGE_INDEX_PATTERNS.get(package, []) + extra = remote_config.get("index_patterns", []) + return defaults + [p for p in extra if p not in defaults] + def get_cache_config(self, remote_name: str) -> dict: """Get cache configuration for a specific remote""" remote_config = self.get_remote_config(remote_name) diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 832a6d7..e288b46 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -179,7 +179,8 @@ async def check_artifact_patterns( remote_name: str, repo_path: str, file_path: str, full_path: str ) -> bool: # First check if this is an index file - always allow index files - if cache.is_index_file(file_path) or cache.is_index_file(full_path): + index_patterns = config.get_index_patterns(remote_name) + if cache.is_index_file(file_path, index_patterns) or cache.is_index_file(full_path, index_patterns): return True # Then check basic include patterns @@ -319,7 +320,7 @@ async def get_artifact(remote_name: str, path: str): # For index files, check Redis TTL validity filename = os.path.basename(path) - is_index = cache.is_index_file(path) # Check full path, not just filename + is_index = cache.is_index_file(path, config.get_index_patterns(remote_name)) if cached_key and is_index: # Index file exists, but check if it's still valid @@ -467,7 +468,7 @@ async def docker_v2_proxy(request: Request, remote_name: str, path: str): if not storage.exists(cached_key): cached_key = None - is_index = cache.is_index_file(path) + is_index = cache.is_index_file(path, config.get_index_patterns(remote_name)) if cached_key and is_index: if not cache.is_index_valid(remote_name, path):