feat: index caching

- improve index detection for rpms
- improve logging
This commit is contained in:
Ben Vincent 2026-01-13 18:13:47 +11:00
parent f40675f3d2
commit 9defc78e21
4 changed files with 30 additions and 5 deletions

View File

@ -37,6 +37,7 @@ RUN uv sync --frozen
# Copy application source # Copy application source
COPY --chown=appuser:appuser src/ ./src/ COPY --chown=appuser:appuser src/ ./src/
COPY --chown=appuser:appuser remotes.yaml ./ COPY --chown=appuser:appuser remotes.yaml ./
COPY --chown=appuser:appuser ca-bundle.pem ./
# Expose port # Expose port
EXPOSE 8000 EXPOSE 8000

View File

@ -1,6 +1,6 @@
[project] [project]
name = "artifactapi" name = "artifactapi"
version = "2.0.0" version = "2.0.1"
description = "Generic artifact caching system with support for various package managers" description = "Generic artifact caching system with support for various package managers"
dependencies = [ dependencies = [

View File

@ -23,8 +23,13 @@ class RedisCache:
file_path.endswith("APKINDEX.tar.gz") file_path.endswith("APKINDEX.tar.gz")
or file_path.endswith("Packages.gz") or file_path.endswith("Packages.gz")
or file_path.endswith("repomd.xml") or file_path.endswith("repomd.xml")
or "repodata/" in file_path or ("repodata/" in file_path
and file_path.endswith((".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst")) and file_path.endswith((
".xml", ".xml.gz", ".xml.bz2", ".xml.xz", ".xml.zck", ".xml.zst",
".sqlite", ".sqlite.gz", ".sqlite.bz2", ".sqlite.xz", ".sqlite.zck", ".sqlite.zst",
".yaml.xz", ".yaml.gz", ".yaml.bz2", ".yaml.zst",
".asc", ".txt"
)))
) )
def get_index_cache_key(self, remote_name: str, path: str) -> str: def get_index_cache_key(self, remote_name: str, path: str) -> str:

View File

@ -1,6 +1,7 @@
import os import os
import re import re
import hashlib import hashlib
import logging
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
import httpx import httpx
from fastapi import FastAPI, HTTPException, Response, Query, File, UploadFile from fastapi import FastAPI, HTTPException, Response, Query, File, UploadFile
@ -20,7 +21,14 @@ class ArtifactRequest(BaseModel):
include_pattern: str include_pattern: str
app = FastAPI(title="Artifact Storage API", version="2.0.0") # Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = FastAPI(title="Artifact Storage API", version="2.0.1")
# Initialize components using config # Initialize components using config
config_path = os.environ.get("CONFIG_PATH") config_path = os.environ.get("CONFIG_PATH")
@ -45,7 +53,7 @@ def read_root():
config._check_reload() config._check_reload()
return { return {
"message": "Artifact Storage API", "message": "Artifact Storage API",
"version": "2.0.0", "version": app.version,
"remotes": list(config.config.get("remotes", {}).keys()), "remotes": list(config.config.get("remotes", {}).keys()),
} }
@ -105,6 +113,7 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
key = storage.get_object_key_from_path(remote_name, path) key = storage.get_object_key_from_path(remote_name, path)
if storage.exists(key): if storage.exists(key):
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
return { return {
"url": url, "url": url,
"cached_url": storage.get_url(key), "cached_url": storage.get_url(key),
@ -118,6 +127,8 @@ async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
storage_path = storage.upload(key, response.content) storage_path = storage.upload(key, response.content)
logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})")
return { return {
"url": url, "url": url,
"cached_url": storage.get_url(key), "cached_url": storage.get_url(key),
@ -173,6 +184,7 @@ async def get_artifact(remote_name: str, path: str):
# Check if artifact matches configured patterns # Check if artifact matches configured patterns
if not await check_artifact_patterns(remote_name, repo_path, file_path, path): if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
raise HTTPException( raise HTTPException(
status_code=403, detail="Artifact not allowed by configuration patterns" status_code=403, detail="Artifact not allowed by configuration patterns"
) )
@ -198,6 +210,7 @@ async def get_artifact(remote_name: str, path: str):
# Index file exists, but check if it's still valid # Index file exists, but check if it's still valid
if not cache.is_index_valid(remote_name, path): if not cache.is_index_valid(remote_name, path):
# Index has expired, remove it from S3 # Index has expired, remove it from S3
logger.info(f"Index EXPIRED: {remote_name}/{path} - removing from cache")
cache.cleanup_expired_index(storage, remote_name, path) cache.cleanup_expired_index(storage, remote_name, path)
cached_key = None # Force re-download cached_key = None # Force re-download
@ -207,6 +220,9 @@ async def get_artifact(remote_name: str, path: str):
artifact_data = storage.download_object(cached_key) artifact_data = storage.download_object(cached_key)
filename = os.path.basename(path) filename = os.path.basename(path)
# Log cache hit
logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})")
# Determine content type based on file extension # Determine content type based on file extension
content_type = "application/octet-stream" content_type = "application/octet-stream"
if filename.endswith(".tar.gz"): if filename.endswith(".tar.gz"):
@ -245,9 +261,11 @@ async def get_artifact(remote_name: str, path: str):
) )
# Artifact not cached, cache it first # Artifact not cached, cache it first
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
result = await cache_single_artifact(remote_url, remote_name, path) result = await cache_single_artifact(remote_url, remote_name, path)
if result["status"] == "error": if result["status"] == "error":
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
raise HTTPException( raise HTTPException(
status_code=502, detail=f"Failed to fetch artifact: {result['error']}" status_code=502, detail=f"Failed to fetch artifact: {result['error']}"
) )
@ -258,6 +276,7 @@ async def get_artifact(remote_name: str, path: str):
cache_config = config.get_cache_config(remote_name) cache_config = config.get_cache_config(remote_name)
index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes index_ttl = cache_config.get("index_ttl", 300) # Default 5 minutes
cache.mark_index_cached(remote_name, path, index_ttl) cache.mark_index_cached(remote_name, path, index_ttl)
logger.info(f"Index file cached with TTL: {remote_name}/{path} (ttl: {index_ttl}s)")
# Now return the cached artifact # Now return the cached artifact
try: try: