From 0df726467a56b3b898492dacb3da87a523fed2ec Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Tue, 28 Apr 2026 22:09:58 +1000 Subject: [PATCH] refactor: split cache, database, and remote logic into submodules cache/redis.py, database/postgres.py, and remote/{base,generic,helm,npm,python,rpm}.py replace the flat modules. All public symbols re-exported from their package __init__.py for backwards compatibility. No functional changes; all 187 tests pass. Closes #19 --- README.md | 25 +++++++ src/artifactapi/cache/__init__.py | 3 + src/artifactapi/{cache.py => cache/redis.py} | 11 +--- src/artifactapi/database/__init__.py | 3 + .../{database.py => database/postgres.py} | 12 ---- src/artifactapi/main.py | 65 +++++-------------- src/artifactapi/remote/__init__.py | 4 ++ src/artifactapi/remote/base.py | 16 +++++ src/artifactapi/remote/generic.py | 3 + src/artifactapi/remote/helm.py | 18 +++++ src/artifactapi/remote/npm.py | 21 ++++++ src/artifactapi/remote/python.py | 32 +++++++++ src/artifactapi/remote/rpm.py | 3 + 13 files changed, 145 insertions(+), 71 deletions(-) create mode 100644 src/artifactapi/cache/__init__.py rename src/artifactapi/{cache.py => cache/redis.py} (87%) create mode 100644 src/artifactapi/database/__init__.py rename src/artifactapi/{database.py => database/postgres.py} (93%) create mode 100644 src/artifactapi/remote/__init__.py create mode 100644 src/artifactapi/remote/base.py create mode 100644 src/artifactapi/remote/generic.py create mode 100644 src/artifactapi/remote/helm.py create mode 100644 src/artifactapi/remote/npm.py create mode 100644 src/artifactapi/remote/python.py create mode 100644 src/artifactapi/remote/rpm.py diff --git a/README.md b/README.md index bf27022..0ea4174 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,31 @@ client → /api/v1/remote/{remote}/{path} Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2. +### Code layout + +``` +src/artifactapi/ +├── main.py — FastAPI app, route handlers +├── config.py — ConfigManager (loads remotes.yaml) +├── storage.py — S3Storage (MinIO/S3 abstraction) +├── docker_auth.py — Docker Bearer token fetching +├── metrics.py — Prometheus + Redis metrics +├── cache/ +│ ├── __init__.py — re-exports RedisCache +│ └── redis.py — RedisCache (TTL keys, ETag metadata) +├── database/ +│ ├── __init__.py — re-exports DatabaseManager +│ └── postgres.py — DatabaseManager (artifact + local-file tables) +└── remote/ + ├── __init__.py + ├── base.py — content-type detection + ├── generic.py — generic HTTP remotes + ├── helm.py — Helm index.yaml URL rewriting + ├── npm.py — npm metadata URL rewriting + ├── python.py — PyPI URL construction + HTML rewriting + └── rpm.py — RPM remotes +``` + ## API Endpoints | Method | Path | Description | diff --git a/src/artifactapi/cache/__init__.py b/src/artifactapi/cache/__init__.py new file mode 100644 index 0000000..7f06ae6 --- /dev/null +++ b/src/artifactapi/cache/__init__.py @@ -0,0 +1,3 @@ +from .redis import RedisCache + +__all__ = ["RedisCache"] diff --git a/src/artifactapi/cache.py b/src/artifactapi/cache/redis.py similarity index 87% rename from src/artifactapi/cache.py rename to src/artifactapi/cache/redis.py index 9e3940b..9d22a3b 100644 --- a/src/artifactapi/cache.py +++ b/src/artifactapi/cache/redis.py @@ -11,7 +11,6 @@ class RedisCache: try: self.client = redis.from_url(self.redis_url, decode_responses=True) - # Test connection self.client.ping() self.available = True except Exception as e: @@ -20,7 +19,6 @@ class RedisCache: self.available = False def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool: - """Return True if file_path matches any of the mutable patterns.""" if patterns is None: patterns = [] return any(re.search(p, file_path) for p in patterns) @@ -32,10 +30,8 @@ class RedisCache: return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" def is_index_valid(self, remote_name: str, path: str) -> bool: - """Check if mutable file is still within its TTL window.""" if not self.available: return False - try: key = self.get_index_cache_key(remote_name, path) return self.client.exists(key) > 0 @@ -43,10 +39,8 @@ class RedisCache: return False def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None: - """Set or refresh the TTL key for a mutable file.""" if not self.available: return - try: key = self.get_index_cache_key(remote_name, path) self.client.setex(key, ttl, str(int(time.time()))) @@ -54,7 +48,6 @@ class RedisCache: pass def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None: - """Persist ETag and Last-Modified for future conditional requests.""" if not self.available: return data = {} @@ -70,7 +63,6 @@ class RedisCache: pass def get_mutable_meta(self, remote_name: str, path: str) -> dict: - """Return stored ETag/Last-Modified for a mutable file, or {}.""" if not self.available: return {} try: @@ -87,14 +79,13 @@ class RedisCache: pass def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None: - """Remove an expired mutable file from S3 and clear its Redis meta.""" if not self.available: return try: import os - from .config import ConfigManager + from ..config import ConfigManager config_path = os.environ.get("CONFIG_PATH") if config_path: diff --git a/src/artifactapi/database/__init__.py b/src/artifactapi/database/__init__.py new file mode 100644 index 0000000..2a4fd1c --- /dev/null +++ b/src/artifactapi/database/__init__.py @@ -0,0 +1,3 @@ +from .postgres import DatabaseManager + +__all__ = ["DatabaseManager"] diff --git a/src/artifactapi/database.py b/src/artifactapi/database/postgres.py similarity index 93% rename from src/artifactapi/database.py rename to src/artifactapi/database/postgres.py index c8e462a..733c131 100644 --- a/src/artifactapi/database.py +++ b/src/artifactapi/database/postgres.py @@ -9,7 +9,6 @@ class DatabaseManager: self._init_database() def _init_database(self): - """Initialize database connection and create schema if needed""" try: self.connection = psycopg2.connect(self.db_url) self.connection.autocommit = True @@ -21,10 +20,8 @@ class DatabaseManager: self.available = False def _create_schema(self): - """Create tables if they don't exist""" try: with self.connection.cursor() as cursor: - # Create table to map S3 keys to remote names cursor.execute(""" CREATE TABLE IF NOT EXISTS artifact_mappings ( id SERIAL PRIMARY KEY, @@ -51,7 +48,6 @@ class DatabaseManager: ) """) - # Create indexes separately cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)") @@ -61,7 +57,6 @@ class DatabaseManager: print(f"Error creating schema: {e}") def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int): - """Record mapping between S3 key and remote""" if not self.available: return @@ -83,7 +78,6 @@ class DatabaseManager: print(f"Error recording artifact mapping: {e}") def get_storage_by_remote(self) -> dict[str, int]: - """Get storage size breakdown by remote from database""" if not self.available: return {} @@ -101,7 +95,6 @@ class DatabaseManager: return {} def get_remote_for_s3_key(self, s3_key: str) -> str | None: - """Get remote name for given S3 key""" if not self.available: return None @@ -126,7 +119,6 @@ class DatabaseManager: sha256_sum: str, content_type: str = None, ): - """Add a file to local repository""" if not self.available: return False @@ -153,7 +145,6 @@ class DatabaseManager: return False def get_local_file_metadata(self, repository_name: str, file_path: str): - """Get metadata for a local file""" if not self.available: return None @@ -185,7 +176,6 @@ class DatabaseManager: return None def list_local_files(self, repository_name: str, prefix: str = ""): - """List files in local repository with optional path prefix""" if not self.available: return [] @@ -229,7 +219,6 @@ class DatabaseManager: return [] def delete_local_file(self, repository_name: str, file_path: str): - """Delete a file from local repository""" if not self.available: return False @@ -251,7 +240,6 @@ class DatabaseManager: return None def file_exists(self, repository_name: str, file_path: str): - """Check if file exists in local repository""" if not self.available: return False diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 8816249..911bc6e 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -25,6 +25,10 @@ from .config import ConfigManager from .database import DatabaseManager from .docker_auth import get_docker_token_for_response from .metrics import MetricsManager +from .remote import helm as _helm +from .remote import npm as _npm +from .remote import python as _pypi +from .remote.base import get_content_type as _get_content_type from .storage import S3Storage @@ -163,9 +167,8 @@ async def construct_remote_url(remote_name: str, path: str) -> str: if remote_config.get("package") == "docker": return f"{base_url}/v2/{path}" - # PyPI splits index and files across two hosts; redirect simple/ requests to pypi.org - if remote_config.get("package") == "pypi" and base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path: - return f"https://pypi.org/{path}" + if remote_config.get("package") == "pypi": + return _pypi.construct_url(base_url, path) return f"{base_url}/{path}" @@ -337,24 +340,6 @@ async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) - return False -def _get_content_type(filename: str) -> str: - if filename.endswith((".tar.gz", ".tgz")): - return "application/gzip" - if filename.endswith(".zip") or filename.endswith(".whl"): - return "application/zip" - if filename.endswith(".exe"): - return "application/x-msdownload" - if filename.endswith(".rpm"): - return "application/x-rpm" - if filename.endswith(".xml"): - return "application/xml" - if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")): - return "application/gzip" - if filename.endswith((".yaml", ".yml")): - return "text/yaml" - return "application/octet-stream" - - def _resolve_content( data: bytes, path: str, @@ -364,34 +349,16 @@ def _resolve_content( remote_name: str = "", ) -> tuple[bytes, str]: """Return (possibly-rewritten data, content_type) for a cached artifact.""" - if remote_config.get("package") == "pypi": - immutable = remote_config.get("immutable_patterns", []) - if not any(re.search(p, path) for p in immutable): - proxy_base = str(request.base_url).rstrip("/") - base_url = remote_config.get("base_url", "").rstrip("/") - data = data.replace( - base_url.encode(), - f"{proxy_base}/api/v1/remote/{remote_name}".encode(), - ) - return data, "text/html; charset=utf-8" - if remote_config.get("package") == "npm": - immutable = remote_config.get("immutable_patterns", []) - if not any(re.search(p, path) for p in immutable): - proxy_base = str(request.base_url).rstrip("/") - base_url = remote_config.get("base_url", "").rstrip("/") - data = data.replace( - base_url.encode(), - f"{proxy_base}/api/v1/remote/{remote_name}".encode(), - ) - return data, "application/json" - if remote_config.get("package") == "helm" and filename == "index.yaml": - proxy_base = str(request.base_url).rstrip("/") - base_url = remote_config.get("base_url", "").rstrip("/") - data = data.replace( - base_url.encode(), - f"{proxy_base}/api/v1/remote/{remote_name}".encode(), - ) - return data, "text/yaml" + package = remote_config.get("package") + proxy_base = str(request.base_url).rstrip("/") + base_url = remote_config.get("base_url", "").rstrip("/") + + if package == "pypi": + return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name) + if package == "npm": + return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name) + if package == "helm": + return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name) return data, _get_content_type(filename) diff --git a/src/artifactapi/remote/__init__.py b/src/artifactapi/remote/__init__.py new file mode 100644 index 0000000..96efd4d --- /dev/null +++ b/src/artifactapi/remote/__init__.py @@ -0,0 +1,4 @@ +from . import generic, helm, npm, python, rpm +from .base import get_content_type + +__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"] diff --git a/src/artifactapi/remote/base.py b/src/artifactapi/remote/base.py new file mode 100644 index 0000000..ce5f523 --- /dev/null +++ b/src/artifactapi/remote/base.py @@ -0,0 +1,16 @@ +def get_content_type(filename: str) -> str: + if filename.endswith((".tar.gz", ".tgz")): + return "application/gzip" + if filename.endswith(".zip") or filename.endswith(".whl"): + return "application/zip" + if filename.endswith(".exe"): + return "application/x-msdownload" + if filename.endswith(".rpm"): + return "application/x-rpm" + if filename.endswith(".xml"): + return "application/xml" + if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")): + return "application/gzip" + if filename.endswith((".yaml", ".yml")): + return "text/yaml" + return "application/octet-stream" diff --git a/src/artifactapi/remote/generic.py b/src/artifactapi/remote/generic.py new file mode 100644 index 0000000..3a41962 --- /dev/null +++ b/src/artifactapi/remote/generic.py @@ -0,0 +1,3 @@ +from .base import get_content_type + +__all__ = ["get_content_type"] diff --git a/src/artifactapi/remote/helm.py b/src/artifactapi/remote/helm.py new file mode 100644 index 0000000..dc0aa79 --- /dev/null +++ b/src/artifactapi/remote/helm.py @@ -0,0 +1,18 @@ +from .base import get_content_type + + +def resolve_content( + data: bytes, + path: str, + filename: str, + base_url: str, + proxy_url: str, + remote_name: str, +) -> tuple[bytes, str]: + if filename == "index.yaml": + data = data.replace( + base_url.encode(), + f"{proxy_url}/api/v1/remote/{remote_name}".encode(), + ) + return data, "text/yaml" + return data, get_content_type(filename) diff --git a/src/artifactapi/remote/npm.py b/src/artifactapi/remote/npm.py new file mode 100644 index 0000000..3547b2d --- /dev/null +++ b/src/artifactapi/remote/npm.py @@ -0,0 +1,21 @@ +import re + +from .base import get_content_type + + +def resolve_content( + data: bytes, + path: str, + filename: str, + immutable_patterns: list[str], + base_url: str, + proxy_url: str, + remote_name: str, +) -> tuple[bytes, str]: + if not any(re.search(p, path) for p in immutable_patterns): + data = data.replace( + base_url.encode(), + f"{proxy_url}/api/v1/remote/{remote_name}".encode(), + ) + return data, "application/json" + return data, get_content_type(filename) diff --git a/src/artifactapi/remote/python.py b/src/artifactapi/remote/python.py new file mode 100644 index 0000000..bed8d2d --- /dev/null +++ b/src/artifactapi/remote/python.py @@ -0,0 +1,32 @@ +import re + +from .base import get_content_type + + +def construct_url(base_url: str, path: str) -> str: + """Build the upstream URL for a PyPI request. + + PyPI splits simple/ index pages (pypi.org) from file downloads + (files.pythonhosted.org), so simple/ requests are redirected to pypi.org. + """ + if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path: + return f"https://pypi.org/{path}" + return f"{base_url}/{path}" + + +def resolve_content( + data: bytes, + path: str, + filename: str, + immutable_patterns: list[str], + base_url: str, + proxy_url: str, + remote_name: str, +) -> tuple[bytes, str]: + if not any(re.search(p, path) for p in immutable_patterns): + data = data.replace( + base_url.encode(), + f"{proxy_url}/api/v1/remote/{remote_name}".encode(), + ) + return data, "text/html; charset=utf-8" + return data, get_content_type(filename) diff --git a/src/artifactapi/remote/rpm.py b/src/artifactapi/remote/rpm.py new file mode 100644 index 0000000..3a41962 --- /dev/null +++ b/src/artifactapi/remote/rpm.py @@ -0,0 +1,3 @@ +from .base import get_content_type + +__all__ = ["get_content_type"]