refactor: split cache, database, and remote logic into submodules

cache/redis.py, database/postgres.py, and remote/{base,generic,helm,npm,python,rpm}.py
replace the flat modules. All public symbols re-exported from their package
__init__.py for backwards compatibility. No functional changes; all 187 tests pass.

Closes #19
This commit is contained in:
2026-04-28 22:09:58 +10:00
parent b8bc7f8714
commit 0df726467a
13 changed files with 145 additions and 71 deletions
+25
View File
@@ -27,6 +27,31 @@ client → /api/v1/remote/{remote}/{path}
Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2.
### Code layout
```
src/artifactapi/
├── main.py — FastAPI app, route handlers
├── config.py — ConfigManager (loads remotes.yaml)
├── storage.py — S3Storage (MinIO/S3 abstraction)
├── docker_auth.py — Docker Bearer token fetching
├── metrics.py — Prometheus + Redis metrics
├── cache/
│ ├── __init__.py — re-exports RedisCache
│ └── redis.py — RedisCache (TTL keys, ETag metadata)
├── database/
│ ├── __init__.py — re-exports DatabaseManager
│ └── postgres.py — DatabaseManager (artifact + local-file tables)
└── remote/
├── __init__.py
├── base.py — content-type detection
├── generic.py — generic HTTP remotes
├── helm.py — Helm index.yaml URL rewriting
├── npm.py — npm metadata URL rewriting
├── python.py — PyPI URL construction + HTML rewriting
└── rpm.py — RPM remotes
```
## API Endpoints
| Method | Path | Description |
+3
View File
@@ -0,0 +1,3 @@
from .redis import RedisCache
__all__ = ["RedisCache"]
+1 -10
View File
@@ -11,7 +11,6 @@ class RedisCache:
try:
self.client = redis.from_url(self.redis_url, decode_responses=True)
# Test connection
self.client.ping()
self.available = True
except Exception as e:
@@ -20,7 +19,6 @@ class RedisCache:
self.available = False
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
"""Return True if file_path matches any of the mutable patterns."""
if patterns is None:
patterns = []
return any(re.search(p, file_path) for p in patterns)
@@ -32,10 +30,8 @@ class RedisCache:
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
def is_index_valid(self, remote_name: str, path: str) -> bool:
"""Check if mutable file is still within its TTL window."""
if not self.available:
return False
try:
key = self.get_index_cache_key(remote_name, path)
return self.client.exists(key) > 0
@@ -43,10 +39,8 @@ class RedisCache:
return False
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
"""Set or refresh the TTL key for a mutable file."""
if not self.available:
return
try:
key = self.get_index_cache_key(remote_name, path)
self.client.setex(key, ttl, str(int(time.time())))
@@ -54,7 +48,6 @@ class RedisCache:
pass
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
"""Persist ETag and Last-Modified for future conditional requests."""
if not self.available:
return
data = {}
@@ -70,7 +63,6 @@ class RedisCache:
pass
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
if not self.available:
return {}
try:
@@ -87,14 +79,13 @@ class RedisCache:
pass
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
"""Remove an expired mutable file from S3 and clear its Redis meta."""
if not self.available:
return
try:
import os
from .config import ConfigManager
from ..config import ConfigManager
config_path = os.environ.get("CONFIG_PATH")
if config_path:
+3
View File
@@ -0,0 +1,3 @@
from .postgres import DatabaseManager
__all__ = ["DatabaseManager"]
@@ -9,7 +9,6 @@ class DatabaseManager:
self._init_database()
def _init_database(self):
"""Initialize database connection and create schema if needed"""
try:
self.connection = psycopg2.connect(self.db_url)
self.connection.autocommit = True
@@ -21,10 +20,8 @@ class DatabaseManager:
self.available = False
def _create_schema(self):
"""Create tables if they don't exist"""
try:
with self.connection.cursor() as cursor:
# Create table to map S3 keys to remote names
cursor.execute("""
CREATE TABLE IF NOT EXISTS artifact_mappings (
id SERIAL PRIMARY KEY,
@@ -51,7 +48,6 @@ class DatabaseManager:
)
""")
# Create indexes separately
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
@@ -61,7 +57,6 @@ class DatabaseManager:
print(f"Error creating schema: {e}")
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
"""Record mapping between S3 key and remote"""
if not self.available:
return
@@ -83,7 +78,6 @@ class DatabaseManager:
print(f"Error recording artifact mapping: {e}")
def get_storage_by_remote(self) -> dict[str, int]:
"""Get storage size breakdown by remote from database"""
if not self.available:
return {}
@@ -101,7 +95,6 @@ class DatabaseManager:
return {}
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
"""Get remote name for given S3 key"""
if not self.available:
return None
@@ -126,7 +119,6 @@ class DatabaseManager:
sha256_sum: str,
content_type: str = None,
):
"""Add a file to local repository"""
if not self.available:
return False
@@ -153,7 +145,6 @@ class DatabaseManager:
return False
def get_local_file_metadata(self, repository_name: str, file_path: str):
"""Get metadata for a local file"""
if not self.available:
return None
@@ -185,7 +176,6 @@ class DatabaseManager:
return None
def list_local_files(self, repository_name: str, prefix: str = ""):
"""List files in local repository with optional path prefix"""
if not self.available:
return []
@@ -229,7 +219,6 @@ class DatabaseManager:
return []
def delete_local_file(self, repository_name: str, file_path: str):
"""Delete a file from local repository"""
if not self.available:
return False
@@ -251,7 +240,6 @@ class DatabaseManager:
return None
def file_exists(self, repository_name: str, file_path: str):
"""Check if file exists in local repository"""
if not self.available:
return False
+16 -49
View File
@@ -25,6 +25,10 @@ from .config import ConfigManager
from .database import DatabaseManager
from .docker_auth import get_docker_token_for_response
from .metrics import MetricsManager
from .remote import helm as _helm
from .remote import npm as _npm
from .remote import python as _pypi
from .remote.base import get_content_type as _get_content_type
from .storage import S3Storage
@@ -163,9 +167,8 @@ async def construct_remote_url(remote_name: str, path: str) -> str:
if remote_config.get("package") == "docker":
return f"{base_url}/v2/{path}"
# PyPI splits index and files across two hosts; redirect simple/ requests to pypi.org
if remote_config.get("package") == "pypi" and base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
return f"https://pypi.org/{path}"
if remote_config.get("package") == "pypi":
return _pypi.construct_url(base_url, path)
return f"{base_url}/{path}"
@@ -337,24 +340,6 @@ async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -
return False
def _get_content_type(filename: str) -> str:
if filename.endswith((".tar.gz", ".tgz")):
return "application/gzip"
if filename.endswith(".zip") or filename.endswith(".whl"):
return "application/zip"
if filename.endswith(".exe"):
return "application/x-msdownload"
if filename.endswith(".rpm"):
return "application/x-rpm"
if filename.endswith(".xml"):
return "application/xml"
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
return "application/gzip"
if filename.endswith((".yaml", ".yml")):
return "text/yaml"
return "application/octet-stream"
def _resolve_content(
data: bytes,
path: str,
@@ -364,34 +349,16 @@ def _resolve_content(
remote_name: str = "",
) -> tuple[bytes, str]:
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
if remote_config.get("package") == "pypi":
immutable = remote_config.get("immutable_patterns", [])
if not any(re.search(p, path) for p in immutable):
proxy_base = str(request.base_url).rstrip("/")
base_url = remote_config.get("base_url", "").rstrip("/")
data = data.replace(
base_url.encode(),
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
)
return data, "text/html; charset=utf-8"
if remote_config.get("package") == "npm":
immutable = remote_config.get("immutable_patterns", [])
if not any(re.search(p, path) for p in immutable):
proxy_base = str(request.base_url).rstrip("/")
base_url = remote_config.get("base_url", "").rstrip("/")
data = data.replace(
base_url.encode(),
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
)
return data, "application/json"
if remote_config.get("package") == "helm" and filename == "index.yaml":
proxy_base = str(request.base_url).rstrip("/")
base_url = remote_config.get("base_url", "").rstrip("/")
data = data.replace(
base_url.encode(),
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
)
return data, "text/yaml"
package = remote_config.get("package")
proxy_base = str(request.base_url).rstrip("/")
base_url = remote_config.get("base_url", "").rstrip("/")
if package == "pypi":
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
if package == "npm":
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
if package == "helm":
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
return data, _get_content_type(filename)
+4
View File
@@ -0,0 +1,4 @@
from . import generic, helm, npm, python, rpm
from .base import get_content_type
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
+16
View File
@@ -0,0 +1,16 @@
def get_content_type(filename: str) -> str:
if filename.endswith((".tar.gz", ".tgz")):
return "application/gzip"
if filename.endswith(".zip") or filename.endswith(".whl"):
return "application/zip"
if filename.endswith(".exe"):
return "application/x-msdownload"
if filename.endswith(".rpm"):
return "application/x-rpm"
if filename.endswith(".xml"):
return "application/xml"
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
return "application/gzip"
if filename.endswith((".yaml", ".yml")):
return "text/yaml"
return "application/octet-stream"
+3
View File
@@ -0,0 +1,3 @@
from .base import get_content_type
__all__ = ["get_content_type"]
+18
View File
@@ -0,0 +1,18 @@
from .base import get_content_type
def resolve_content(
data: bytes,
path: str,
filename: str,
base_url: str,
proxy_url: str,
remote_name: str,
) -> tuple[bytes, str]:
if filename == "index.yaml":
data = data.replace(
base_url.encode(),
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
)
return data, "text/yaml"
return data, get_content_type(filename)
+21
View File
@@ -0,0 +1,21 @@
import re
from .base import get_content_type
def resolve_content(
data: bytes,
path: str,
filename: str,
immutable_patterns: list[str],
base_url: str,
proxy_url: str,
remote_name: str,
) -> tuple[bytes, str]:
if not any(re.search(p, path) for p in immutable_patterns):
data = data.replace(
base_url.encode(),
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
)
return data, "application/json"
return data, get_content_type(filename)
+32
View File
@@ -0,0 +1,32 @@
import re
from .base import get_content_type
def construct_url(base_url: str, path: str) -> str:
"""Build the upstream URL for a PyPI request.
PyPI splits simple/ index pages (pypi.org) from file downloads
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
"""
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
return f"https://pypi.org/{path}"
return f"{base_url}/{path}"
def resolve_content(
data: bytes,
path: str,
filename: str,
immutable_patterns: list[str],
base_url: str,
proxy_url: str,
remote_name: str,
) -> tuple[bytes, str]:
if not any(re.search(p, path) for p in immutable_patterns):
data = data.replace(
base_url.encode(),
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
)
return data, "text/html; charset=utf-8"
return data, get_content_type(filename)
+3
View File
@@ -0,0 +1,3 @@
from .base import get_content_type
__all__ = ["get_content_type"]