refactor: split cache, database, and remote logic into submodules
cache/redis.py, database/postgres.py, and remote/{base,generic,helm,npm,python,rpm}.py
replace the flat modules. All public symbols re-exported from their package
__init__.py for backwards compatibility. No functional changes; all 187 tests pass.
Closes #19
This commit is contained in:
@@ -27,6 +27,31 @@ client → /api/v1/remote/{remote}/{path}
|
|||||||
|
|
||||||
Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2.
|
Docker Registry traffic uses the `/v2/{remote}/{path}` endpoint implementing the Docker Registry HTTP API v2.
|
||||||
|
|
||||||
|
### Code layout
|
||||||
|
|
||||||
|
```
|
||||||
|
src/artifactapi/
|
||||||
|
├── main.py — FastAPI app, route handlers
|
||||||
|
├── config.py — ConfigManager (loads remotes.yaml)
|
||||||
|
├── storage.py — S3Storage (MinIO/S3 abstraction)
|
||||||
|
├── docker_auth.py — Docker Bearer token fetching
|
||||||
|
├── metrics.py — Prometheus + Redis metrics
|
||||||
|
├── cache/
|
||||||
|
│ ├── __init__.py — re-exports RedisCache
|
||||||
|
│ └── redis.py — RedisCache (TTL keys, ETag metadata)
|
||||||
|
├── database/
|
||||||
|
│ ├── __init__.py — re-exports DatabaseManager
|
||||||
|
│ └── postgres.py — DatabaseManager (artifact + local-file tables)
|
||||||
|
└── remote/
|
||||||
|
├── __init__.py
|
||||||
|
├── base.py — content-type detection
|
||||||
|
├── generic.py — generic HTTP remotes
|
||||||
|
├── helm.py — Helm index.yaml URL rewriting
|
||||||
|
├── npm.py — npm metadata URL rewriting
|
||||||
|
├── python.py — PyPI URL construction + HTML rewriting
|
||||||
|
└── rpm.py — RPM remotes
|
||||||
|
```
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
| Method | Path | Description |
|
| Method | Path | Description |
|
||||||
|
|||||||
Vendored
+3
@@ -0,0 +1,3 @@
|
|||||||
|
from .redis import RedisCache
|
||||||
|
|
||||||
|
__all__ = ["RedisCache"]
|
||||||
+1
-10
@@ -11,7 +11,6 @@ class RedisCache:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
self.client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
# Test connection
|
|
||||||
self.client.ping()
|
self.client.ping()
|
||||||
self.available = True
|
self.available = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -20,7 +19,6 @@ class RedisCache:
|
|||||||
self.available = False
|
self.available = False
|
||||||
|
|
||||||
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool:
|
||||||
"""Return True if file_path matches any of the mutable patterns."""
|
|
||||||
if patterns is None:
|
if patterns is None:
|
||||||
patterns = []
|
patterns = []
|
||||||
return any(re.search(p, file_path) for p in patterns)
|
return any(re.search(p, file_path) for p in patterns)
|
||||||
@@ -32,10 +30,8 @@ class RedisCache:
|
|||||||
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||||
|
|
||||||
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
def is_index_valid(self, remote_name: str, path: str) -> bool:
|
||||||
"""Check if mutable file is still within its TTL window."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
return self.client.exists(key) > 0
|
return self.client.exists(key) > 0
|
||||||
@@ -43,10 +39,8 @@ class RedisCache:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None:
|
||||||
"""Set or refresh the TTL key for a mutable file."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
key = self.get_index_cache_key(remote_name, path)
|
key = self.get_index_cache_key(remote_name, path)
|
||||||
self.client.setex(key, ttl, str(int(time.time())))
|
self.client.setex(key, ttl, str(int(time.time())))
|
||||||
@@ -54,7 +48,6 @@ class RedisCache:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None:
|
||||||
"""Persist ETag and Last-Modified for future conditional requests."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
data = {}
|
data = {}
|
||||||
@@ -70,7 +63,6 @@ class RedisCache:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
def get_mutable_meta(self, remote_name: str, path: str) -> dict:
|
||||||
"""Return stored ETag/Last-Modified for a mutable file, or {}."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
@@ -87,14 +79,13 @@ class RedisCache:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None:
|
||||||
"""Remove an expired mutable file from S3 and clear its Redis meta."""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from .config import ConfigManager
|
from ..config import ConfigManager
|
||||||
|
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
if config_path:
|
if config_path:
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .postgres import DatabaseManager
|
||||||
|
|
||||||
|
__all__ = ["DatabaseManager"]
|
||||||
@@ -9,7 +9,6 @@ class DatabaseManager:
|
|||||||
self._init_database()
|
self._init_database()
|
||||||
|
|
||||||
def _init_database(self):
|
def _init_database(self):
|
||||||
"""Initialize database connection and create schema if needed"""
|
|
||||||
try:
|
try:
|
||||||
self.connection = psycopg2.connect(self.db_url)
|
self.connection = psycopg2.connect(self.db_url)
|
||||||
self.connection.autocommit = True
|
self.connection.autocommit = True
|
||||||
@@ -21,10 +20,8 @@ class DatabaseManager:
|
|||||||
self.available = False
|
self.available = False
|
||||||
|
|
||||||
def _create_schema(self):
|
def _create_schema(self):
|
||||||
"""Create tables if they don't exist"""
|
|
||||||
try:
|
try:
|
||||||
with self.connection.cursor() as cursor:
|
with self.connection.cursor() as cursor:
|
||||||
# Create table to map S3 keys to remote names
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
CREATE TABLE IF NOT EXISTS artifact_mappings (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
@@ -51,7 +48,6 @@ class DatabaseManager:
|
|||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Create indexes separately
|
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||||
@@ -61,7 +57,6 @@ class DatabaseManager:
|
|||||||
print(f"Error creating schema: {e}")
|
print(f"Error creating schema: {e}")
|
||||||
|
|
||||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||||
"""Record mapping between S3 key and remote"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -83,7 +78,6 @@ class DatabaseManager:
|
|||||||
print(f"Error recording artifact mapping: {e}")
|
print(f"Error recording artifact mapping: {e}")
|
||||||
|
|
||||||
def get_storage_by_remote(self) -> dict[str, int]:
|
def get_storage_by_remote(self) -> dict[str, int]:
|
||||||
"""Get storage size breakdown by remote from database"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -101,7 +95,6 @@ class DatabaseManager:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||||
"""Get remote name for given S3 key"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -126,7 +119,6 @@ class DatabaseManager:
|
|||||||
sha256_sum: str,
|
sha256_sum: str,
|
||||||
content_type: str = None,
|
content_type: str = None,
|
||||||
):
|
):
|
||||||
"""Add a file to local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -153,7 +145,6 @@ class DatabaseManager:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
def get_local_file_metadata(self, repository_name: str, file_path: str):
|
||||||
"""Get metadata for a local file"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -185,7 +176,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def list_local_files(self, repository_name: str, prefix: str = ""):
|
def list_local_files(self, repository_name: str, prefix: str = ""):
|
||||||
"""List files in local repository with optional path prefix"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -229,7 +219,6 @@ class DatabaseManager:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
def delete_local_file(self, repository_name: str, file_path: str):
|
def delete_local_file(self, repository_name: str, file_path: str):
|
||||||
"""Delete a file from local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -251,7 +240,6 @@ class DatabaseManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def file_exists(self, repository_name: str, file_path: str):
|
def file_exists(self, repository_name: str, file_path: str):
|
||||||
"""Check if file exists in local repository"""
|
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
+16
-49
@@ -25,6 +25,10 @@ from .config import ConfigManager
|
|||||||
from .database import DatabaseManager
|
from .database import DatabaseManager
|
||||||
from .docker_auth import get_docker_token_for_response
|
from .docker_auth import get_docker_token_for_response
|
||||||
from .metrics import MetricsManager
|
from .metrics import MetricsManager
|
||||||
|
from .remote import helm as _helm
|
||||||
|
from .remote import npm as _npm
|
||||||
|
from .remote import python as _pypi
|
||||||
|
from .remote.base import get_content_type as _get_content_type
|
||||||
from .storage import S3Storage
|
from .storage import S3Storage
|
||||||
|
|
||||||
|
|
||||||
@@ -163,9 +167,8 @@ async def construct_remote_url(remote_name: str, path: str) -> str:
|
|||||||
if remote_config.get("package") == "docker":
|
if remote_config.get("package") == "docker":
|
||||||
return f"{base_url}/v2/{path}"
|
return f"{base_url}/v2/{path}"
|
||||||
|
|
||||||
# PyPI splits index and files across two hosts; redirect simple/ requests to pypi.org
|
if remote_config.get("package") == "pypi":
|
||||||
if remote_config.get("package") == "pypi" and base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
return _pypi.construct_url(base_url, path)
|
||||||
return f"https://pypi.org/{path}"
|
|
||||||
|
|
||||||
return f"{base_url}/{path}"
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
@@ -337,24 +340,6 @@ async def handle_expired_mutable(remote_name: str, path: str, remote_url: str) -
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _get_content_type(filename: str) -> str:
|
|
||||||
if filename.endswith((".tar.gz", ".tgz")):
|
|
||||||
return "application/gzip"
|
|
||||||
if filename.endswith(".zip") or filename.endswith(".whl"):
|
|
||||||
return "application/zip"
|
|
||||||
if filename.endswith(".exe"):
|
|
||||||
return "application/x-msdownload"
|
|
||||||
if filename.endswith(".rpm"):
|
|
||||||
return "application/x-rpm"
|
|
||||||
if filename.endswith(".xml"):
|
|
||||||
return "application/xml"
|
|
||||||
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
|
||||||
return "application/gzip"
|
|
||||||
if filename.endswith((".yaml", ".yml")):
|
|
||||||
return "text/yaml"
|
|
||||||
return "application/octet-stream"
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_content(
|
def _resolve_content(
|
||||||
data: bytes,
|
data: bytes,
|
||||||
path: str,
|
path: str,
|
||||||
@@ -364,34 +349,16 @@ def _resolve_content(
|
|||||||
remote_name: str = "",
|
remote_name: str = "",
|
||||||
) -> tuple[bytes, str]:
|
) -> tuple[bytes, str]:
|
||||||
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
"""Return (possibly-rewritten data, content_type) for a cached artifact."""
|
||||||
if remote_config.get("package") == "pypi":
|
package = remote_config.get("package")
|
||||||
immutable = remote_config.get("immutable_patterns", [])
|
proxy_base = str(request.base_url).rstrip("/")
|
||||||
if not any(re.search(p, path) for p in immutable):
|
base_url = remote_config.get("base_url", "").rstrip("/")
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
if package == "pypi":
|
||||||
data = data.replace(
|
return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
base_url.encode(),
|
if package == "npm":
|
||||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name)
|
||||||
)
|
if package == "helm":
|
||||||
return data, "text/html; charset=utf-8"
|
return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name)
|
||||||
if remote_config.get("package") == "npm":
|
|
||||||
immutable = remote_config.get("immutable_patterns", [])
|
|
||||||
if not any(re.search(p, path) for p in immutable):
|
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
|
||||||
data = data.replace(
|
|
||||||
base_url.encode(),
|
|
||||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
|
||||||
)
|
|
||||||
return data, "application/json"
|
|
||||||
if remote_config.get("package") == "helm" and filename == "index.yaml":
|
|
||||||
proxy_base = str(request.base_url).rstrip("/")
|
|
||||||
base_url = remote_config.get("base_url", "").rstrip("/")
|
|
||||||
data = data.replace(
|
|
||||||
base_url.encode(),
|
|
||||||
f"{proxy_base}/api/v1/remote/{remote_name}".encode(),
|
|
||||||
)
|
|
||||||
return data, "text/yaml"
|
|
||||||
return data, _get_content_type(filename)
|
return data, _get_content_type(filename)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from . import generic, helm, npm, python, rpm
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["generic", "helm", "npm", "python", "rpm", "get_content_type"]
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
def get_content_type(filename: str) -> str:
|
||||||
|
if filename.endswith((".tar.gz", ".tgz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith(".zip") or filename.endswith(".whl"):
|
||||||
|
return "application/zip"
|
||||||
|
if filename.endswith(".exe"):
|
||||||
|
return "application/x-msdownload"
|
||||||
|
if filename.endswith(".rpm"):
|
||||||
|
return "application/x-rpm"
|
||||||
|
if filename.endswith(".xml"):
|
||||||
|
return "application/xml"
|
||||||
|
if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")):
|
||||||
|
return "application/gzip"
|
||||||
|
if filename.endswith((".yaml", ".yml")):
|
||||||
|
return "text/yaml"
|
||||||
|
return "application/octet-stream"
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if filename == "index.yaml":
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/yaml"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "application/json"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(base_url: str, path: str) -> str:
|
||||||
|
"""Build the upstream URL for a PyPI request.
|
||||||
|
|
||||||
|
PyPI splits simple/ index pages (pypi.org) from file downloads
|
||||||
|
(files.pythonhosted.org), so simple/ requests are redirected to pypi.org.
|
||||||
|
"""
|
||||||
|
if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path:
|
||||||
|
return f"https://pypi.org/{path}"
|
||||||
|
return f"{base_url}/{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_content(
|
||||||
|
data: bytes,
|
||||||
|
path: str,
|
||||||
|
filename: str,
|
||||||
|
immutable_patterns: list[str],
|
||||||
|
base_url: str,
|
||||||
|
proxy_url: str,
|
||||||
|
remote_name: str,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
if not any(re.search(p, path) for p in immutable_patterns):
|
||||||
|
data = data.replace(
|
||||||
|
base_url.encode(),
|
||||||
|
f"{proxy_url}/api/v1/remote/{remote_name}".encode(),
|
||||||
|
)
|
||||||
|
return data, "text/html; charset=utf-8"
|
||||||
|
return data, get_content_type(filename)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .base import get_content_type
|
||||||
|
|
||||||
|
__all__ = ["get_content_type"]
|
||||||
Reference in New Issue
Block a user