refactor: split cache, database, and remote logic into submodules
cache/redis.py, database/postgres.py, and remote/{base,generic,helm,npm,python,rpm}.py
replace the flat modules. All public symbols re-exported from their package
__init__.py for backwards compatibility. No functional changes; all 187 tests pass.
Closes #19
This commit is contained in:
@@ -27,6 +27,31 @@ client → /api/v1/remote/{remote}/{path}
|
||||
|
||||
Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2.
|
||||
|
||||
### Code layout
|
||||
|
||||
```
|
||||
src/artifactapi/
|
||||
├── main.py — FastAPI app, route handlers
|
||||
├── config.py — ConfigManager (loads remotes.yaml)
|
||||
├── storage.py — S3Storage (MinIO/S3 abstraction)
|
||||
├── docker_auth.py — Docker Bearer token fetching
|
||||
├── metrics.py — Prometheus + Redis metrics
|
||||
├── cache/
|
||||
│ ├── __init__.py — re-exports RedisCache
|
||||
│ └── redis.py — RedisCache (TTL keys, ETag metadata)
|
||||
├── database/
|
||||
│ ├── __init__.py — re-exports DatabaseManager
|
||||
│ └── postgres.py — DatabaseManager (artifact + local-file tables)
|
||||
└── remote/
|
||||
├── __init__.py
|
||||
├── base.py — content-type detection
|
||||
├── generic.py — generic HTTP remotes
|
||||
├── helm.py — Helm index.yaml URL rewriting
|
||||
├── npm.py — npm metadata URL rewriting
|
||||
├── python.py — PyPI URL construction + HTML rewriting
|
||||
└── rpm.py — RPM remotes
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Method | Path | Description |
|
||||
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
from .redis import RedisCache
|
||||
|
||||
__all__ = ["RedisCache"]
|
||||
+1
-10
@@ -11,7 +11,6 @@ class RedisCache:
|
||||
|
||||
try:
|
||||
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||
# Test connection
|
||||
self.client.ping()
|
||||
self.available = True
|
||||
except Exception as e:
|
||||
@@ -20,7 +19,6 @@ class RedisCache:
|
||||
self.available = False
|
||||
|
||||
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||
"""Return True if file_path matches any of the mutable patterns."""
|
||||
if patterns is None:
|
||||
patterns = []
|
||||
return any(re.search(p, file_path) for p in patterns)
|
||||
@@ -32,10 +30,8 @@ class RedisCache:
|
||||
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||
"""Check if mutable file is still within its TTL window."""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
try:
|
||||
key = self.get_index_cache_key(remote_name, path)
|
||||
return self.client.exists(key) > 0
|
||||
@@ -43,10 +39,8 @@ class RedisCache:
|
||||
return False
|
||||
|
||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||
"""Set or refresh the TTL key for a mutable file."""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
try:
|
||||
key = self.get_index_cache_key(remote_name, path)
|
||||
self.client.setex(key, ttl, str(int(time.time())))
|
||||
@@ -54,7 +48,6 @@ class RedisCache:
|
||||
pass
|
||||
|
||||
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||
"""Persist ETag and Last-Modified for future conditional requests."""
|
||||
if not self.available:
|
||||
return
|
||||
data = {}
|
||||
@@ -70,7 +63,6 @@ class RedisCache:
|
||||
pass
|
||||
|
||||
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
|
||||
if not self.available:
|
||||
return {}
|
||||
try:
|
||||
@@ -87,14 +79,13 @@ class RedisCache:
|
||||
pass
|
||||
|
||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||
"""Remove an expired mutable file from S3 and clear its Redis meta."""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
try:
|
||||
import os
|
||||
|
||||
from .config import ConfigManager
|
||||
from ..config import ConfigManager
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if config_path:
|
||||
@@ -0,0 +1,3 @@
|
||||
from .postgres import DatabaseManager
|
||||
|
||||
__all__ = ["DatabaseManager"]
|
||||
@@ -9,7 +9,6 @@ class DatabaseManager:
|
||||
self._init_database()
|
||||
|
||||
def _init_database(self):
|
||||
"""Initialize database connection and create schema if needed"""
|
||||
try:
|
||||
self.connection = psycopg2.connect(self.db_url)
|
||||
self.connection.autocommit = True
|
||||
@@ -21,10 +20,8 @@ class DatabaseManager:
|
||||
self.available = False
|
||||
|
||||
def _create_schema(self):
|
||||
"""Create tables if they don't exist"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
# Create table to map S3 keys to remote names
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||
id SERIAL PRIMARY KEY,
|
||||
@@ -51,7 +48,6 @@ class DatabaseManager:
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes separately
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||
@@ -61,7 +57,6 @@ class DatabaseManager:
|
||||
print(f"Error creating schema: {e}")
|
||||
|
||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||
"""Record mapping between S3 key and remote"""
|
||||
if not self.available:
|
||||
return
|
||||
|
||||
@@ -83,7 +78,6 @@ class DatabaseManager:
|
||||
print(f"Error recording artifact mapping: {e}")
|
||||
|
||||
def get_storage_by_remote(self) -> dict[str, int]:
|
||||
"""Get storage size breakdown by remote from database"""
|
||||
if not self.available:
|
||||
return {}
|
||||
|
||||
@@ -101,7 +95,6 @@ class DatabaseManager:
|
||||
return {}
|
||||
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||
"""Get remote name for given S3 key"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -126,7 +119,6 @@ class DatabaseManager:
|
||||
sha256_sum: str,
|
||||
content_type: str = None,
|
||||
):
|
||||
"""Add a file to local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -153,7 +145,6 @@ class DatabaseManager:
|
||||
return False
|
||||
|
||||
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||
"""Get metadata for a local file"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -185,7 +176,6 @@ class DatabaseManager:
|
||||
return None
|
||||
|
||||
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||
"""List files in local repository with optional path prefix"""
|
||||
if not self.available:
|
||||
return []
|
||||
|
||||
@@ -229,7 +219,6 @@ class DatabaseManager:
|
||||
return []
|
||||
|
||||
def delete_local_file(self, repository_name: str, file_path: str):
|
||||
"""Delete a file from local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
@@ -251,7 +240,6 @@ class DatabaseManager:
|
||||
return None
|
||||
|
||||
def file_exists(self, repository_name: str, file_path: str):
|
||||
"""Check if file exists in local repository"""
|
||||
if not self.available:
|
||||
return False
|
||||
|
||||
+16
-49
@@ -25,6 +25,10 @@ from .config import ConfigManager
|
||||
from .database import DatabaseManager
|
||||
from .docker_auth import get_docker_token_for_response
|
||||
from .metrics import MetricsManager
|
||||
from .remote import helm as _helm
|
||||
from .remote import npm as _npm
|
||||
from .remote import python as _pypi
|
||||
from .remote.base import get_content_type as _get_content_type
|
||||
from .storage import S3Storage
|
||||
|
||||
|
||||
@@ -163,9 +167,8 @@ async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||
if remote_config.get("package") == "docker":
|
||||
return f"{base_url}/v2/{path}"
|
||||
|
||||
# PyPI splits index and files across two hosts; redirect simple/ requests to pypi.org
|
||||
if remote_config.get("package") == "pypi" and base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||
return f"https://pypi.org/{path}"
|
||||
if remote_config.get("package") == "pypi":
|
||||
return _pypi.construct_url(base_url, path)
|
||||
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
@@ -337,24 +340,6 @@ async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -
|
||||
return False
|
||||
|
||||
|
||||
def _get_content_type(filename: str) -> str:
|
||||
if filename.endswith((".tar.gz", ".tgz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||
return "application/zip"
|
||||
if filename.endswith(".exe"):
|
||||
return "application/x-msdownload"
|
||||
if filename.endswith(".rpm"):
|
||||
return "application/x-rpm"
|
||||
if filename.endswith(".xml"):
|
||||
return "application/xml"
|
||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith((".yaml", ".yml")):
|
||||
return "text/yaml"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
@@ -364,34 +349,16 @@ def _resolve_content(
|
||||
remote_name: str = "",
|
||||
) -> tuple[bytes, str]:
|
||||
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
||||
if remote_config.get("package") == "pypi":
|
||||
immutable = remote_config.get("immutable_patterns", [])
|
||||
if not any(re.search(p, path) for p in immutable):
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/html; charset=utf-8"
|
||||
if remote_config.get("package") == "npm":
|
||||
immutable = remote_config.get("immutable_patterns", [])
|
||||
if not any(re.search(p, path) for p in immutable):
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "application/json"
|
||||
if remote_config.get("package") == "helm" and filename == "index.yaml":
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/yaml"
|
||||
package = remote_config.get("package")
|
||||
proxy_base = str(request.base_url).rstrip("/")
|
||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||
|
||||
if package == "pypi":
|
||||
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||
if package == "npm":
|
||||
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||
if package == "helm":
|
||||
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
|
||||
return data, _get_content_type(filename)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
from . import generic, helm, npm, python, rpm
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
|
||||
@@ -0,0 +1,16 @@
|
||||
def get_content_type(filename: str) -> str:
|
||||
if filename.endswith((".tar.gz", ".tgz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||
return "application/zip"
|
||||
if filename.endswith(".exe"):
|
||||
return "application/x-msdownload"
|
||||
if filename.endswith(".rpm"):
|
||||
return "application/x-rpm"
|
||||
if filename.endswith(".xml"):
|
||||
return "application/xml"
|
||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||
return "application/gzip"
|
||||
if filename.endswith((".yaml", ".yml")):
|
||||
return "text/yaml"
|
||||
return "application/octet-stream"
|
||||
@@ -0,0 +1,3 @@
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["get_content_type"]
|
||||
@@ -0,0 +1,18 @@
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if filename == "index.yaml":
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/yaml"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,21 @@
|
||||
import re
|
||||
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
immutable_patterns: list[str],
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if not any(re.search(p, path) for p in immutable_patterns):
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "application/json"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,32 @@
|
||||
import re
|
||||
|
||||
from .base import get_content_type
|
||||
|
||||
|
||||
def construct_url(base_url: str, path: str) -> str:
|
||||
"""Build the upstream URL for a PyPI request.
|
||||
|
||||
PyPI splits simple/ index pages (pypi.org) from file downloads
|
||||
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
|
||||
"""
|
||||
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||
return f"https://pypi.org/{path}"
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
def resolve_content(
|
||||
data: bytes,
|
||||
path: str,
|
||||
filename: str,
|
||||
immutable_patterns: list[str],
|
||||
base_url: str,
|
||||
proxy_url: str,
|
||||
remote_name: str,
|
||||
) -> tuple[bytes, str]:
|
||||
if not any(re.search(p, path) for p in immutable_patterns):
|
||||
data = data.replace(
|
||||
base_url.encode(),
|
||||
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||
)
|
||||
return data, "text/html; charset=utf-8"
|
||||
return data, get_content_type(filename)
|
||||
@@ -0,0 +1,3 @@
|
||||
from .base import get_content_type
|
||||
|
||||
__all__ = ["get_content_type"]
|
||||
Reference in New Issue
Block a user