feat: add test suite, tox, pre-commit, and ruff formatting
- tests/: 107 unit tests across config, cache, docker_auth, storage, and FastAPI routes; all passing under pytest-asyncio auto mode - tox.ini: runs pytest via uvx --with tox-uv tox (py311) - .pre-commit-config.yaml: ruff lint + ruff-format at v0.15.12 - pyproject.toml: pytest config (asyncio_mode=auto), ruff config (line-length=140), tox/pre-commit added to dev extras - Makefile: test/tox/pre-commit targets via uvx --python 3.11 - Source files reformatted by ruff-format (no logic changes)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
import redis
|
||||
|
||||
|
||||
@@ -28,9 +29,7 @@ class RedisCache:
|
||||
"""Generate cache key for index files"""
|
||||
return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}"
|
||||
|
||||
def is_index_valid(
|
||||
self, remote_name: str, path: str, ttl_override: int = None
|
||||
) -> bool:
|
||||
def is_index_valid(self, remote_name: str, path: str, ttl_override: int = None) -> bool:
|
||||
"""Check if index file is still valid (not expired)"""
|
||||
if not self.available:
|
||||
return False
|
||||
@@ -59,8 +58,10 @@ class RedisCache:
|
||||
|
||||
try:
|
||||
# Construct the URL the same way as in the main flow
|
||||
from .config import ConfigManager
|
||||
import os
|
||||
|
||||
from .config import ConfigManager
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH")
|
||||
if config_path:
|
||||
config = ConfigManager(config_path)
|
||||
|
||||
+11
-12
@@ -1,8 +1,7 @@
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
from typing import Optional
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
_PACKAGE_INDEX_PATTERNS: dict[str, list[str]] = {
|
||||
"alpine": [
|
||||
@@ -31,10 +30,8 @@ class ConfigManager:
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
try:
|
||||
with open(self.config_file, "r") as f:
|
||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(
|
||||
".yml"
|
||||
):
|
||||
with open(self.config_file) as f:
|
||||
if self.config_file.endswith(".yaml") or self.config_file.endswith(".yml"):
|
||||
return yaml.safe_load(f)
|
||||
else:
|
||||
return json.load(f)
|
||||
@@ -54,7 +51,7 @@ class ConfigManager:
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def get_remote_config(self, remote_name: str) -> Optional[dict]:
|
||||
def get_remote_config(self, remote_name: str) -> dict | None:
|
||||
self._check_reload()
|
||||
return self.config.get("remotes", {}).get(remote_name)
|
||||
|
||||
@@ -111,9 +108,7 @@ class ConfigManager:
|
||||
if not redis_url:
|
||||
raise ValueError("REDIS_URL environment variable is required")
|
||||
|
||||
return {
|
||||
"url": redis_url
|
||||
}
|
||||
return {"url": redis_url}
|
||||
|
||||
def get_database_config(self) -> dict:
|
||||
"""Get database configuration from environment variables"""
|
||||
@@ -124,7 +119,11 @@ class ConfigManager:
|
||||
db_name = os.getenv("DBNAME")
|
||||
|
||||
if not all([db_host, db_port, db_user, db_pass, db_name]):
|
||||
missing = [var for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)] if not val]
|
||||
missing = [
|
||||
var
|
||||
for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)]
|
||||
if not val
|
||||
]
|
||||
raise ValueError(f"All database environment variables are required: {', '.join(missing)}")
|
||||
|
||||
db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
@@ -54,25 +52,15 @@ class DatabaseManager:
|
||||
""")
|
||||
|
||||
# Create indexes separately
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)"
|
||||
)
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)")
|
||||
print("Database schema initialized")
|
||||
except Exception as e:
|
||||
print(f"Error creating schema: {e}")
|
||||
|
||||
def record_artifact_mapping(
|
||||
self, s3_key: str, remote_name: str, file_path: str, size_bytes: int
|
||||
):
|
||||
def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int):
|
||||
"""Record mapping between S3 key and remote"""
|
||||
if not self.available:
|
||||
return
|
||||
@@ -112,7 +100,7 @@ class DatabaseManager:
|
||||
print(f"Error getting storage by remote: {e}")
|
||||
return {}
|
||||
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> Optional[str]:
|
||||
def get_remote_for_s3_key(self, s3_key: str) -> str | None:
|
||||
"""Get remote name for given S3 key"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,11 +17,11 @@ _WWW_AUTH_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
def _cache_key(realm: str, service: str, scope: str, username: Optional[str]) -> str:
|
||||
def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str:
|
||||
return f"{realm}|{service}|{scope}|{username or ''}"
|
||||
|
||||
|
||||
def _get_cached_token(key: str) -> Optional[str]:
|
||||
def _get_cached_token(key: str) -> str | None:
|
||||
entry = _token_cache.get(key)
|
||||
if entry and entry[1] > time.time():
|
||||
return entry[0]
|
||||
@@ -38,9 +38,9 @@ async def fetch_token(
|
||||
realm: str,
|
||||
service: str,
|
||||
scope: str,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Fetch a Bearer token from a Docker registry auth server."""
|
||||
key = _cache_key(realm, service, scope, username)
|
||||
cached = _get_cached_token(key)
|
||||
@@ -75,7 +75,7 @@ async def fetch_token(
|
||||
return token
|
||||
|
||||
|
||||
def parse_www_authenticate(header: str) -> Optional[tuple[str, str, str]]:
|
||||
def parse_www_authenticate(header: str) -> tuple[str, str, str] | None:
|
||||
"""Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None."""
|
||||
m = _WWW_AUTH_RE.search(header)
|
||||
if not m:
|
||||
@@ -85,9 +85,9 @@ def parse_www_authenticate(header: str) -> Optional[tuple[str, str, str]]:
|
||||
|
||||
async def get_docker_token_for_response(
|
||||
www_authenticate: str,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> str | None:
|
||||
"""Given a WWW-Authenticate header value, fetch and return a Bearer token."""
|
||||
parsed = parse_www_authenticate(www_authenticate)
|
||||
if not parsed:
|
||||
|
||||
+44
-104
@@ -1,28 +1,30 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from fastapi import FastAPI, HTTPException, Response, Request, Query, File, UploadFile
|
||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||
from fastapi import FastAPI, File, HTTPException, Query, Request, Response, UploadFile
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||
from pydantic import BaseModel
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
|
||||
__version__ = version("artifactapi")
|
||||
except ImportError:
|
||||
# Fallback for development when package isn't installed
|
||||
__version__ = "dev"
|
||||
|
||||
from .cache import RedisCache
|
||||
from .config import ConfigManager
|
||||
from .database import DatabaseManager
|
||||
from .storage import S3Storage
|
||||
from .cache import RedisCache
|
||||
from .metrics import MetricsManager
|
||||
from .docker_auth import get_docker_token_for_response
|
||||
from .metrics import MetricsManager
|
||||
from .storage import S3Storage
|
||||
|
||||
|
||||
class ArtifactRequest(BaseModel):
|
||||
@@ -31,10 +33,7 @@ class ArtifactRequest(BaseModel):
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||
@@ -75,19 +74,11 @@ def health_check():
|
||||
@app.put("/cache/flush")
|
||||
def flush_cache(
|
||||
remote: str = Query(default=None, description="Specific remote to flush (optional)"),
|
||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'")
|
||||
cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"),
|
||||
):
|
||||
"""Flush cache entries for specified remote or all remotes"""
|
||||
try:
|
||||
result = {
|
||||
"remote": remote,
|
||||
"cache_type": cache_type,
|
||||
"flushed": {
|
||||
"redis_keys": 0,
|
||||
"s3_objects": 0,
|
||||
"operations": []
|
||||
}
|
||||
}
|
||||
result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}}
|
||||
|
||||
# Flush Redis entries based on cache_type
|
||||
if cache_type in ["all", "index", "metrics"] and cache.available and cache.client:
|
||||
@@ -124,8 +115,8 @@ def flush_cache(
|
||||
list_params["Prefix"] = f"{remote}/"
|
||||
|
||||
response = storage.client.list_objects_v2(**list_params)
|
||||
if 'Contents' in response:
|
||||
objects_to_delete = [obj['Key'] for obj in response['Contents']]
|
||||
if "Contents" in response:
|
||||
objects_to_delete = [obj["Key"] for obj in response["Contents"]]
|
||||
|
||||
for key in objects_to_delete:
|
||||
try:
|
||||
@@ -156,15 +147,11 @@ def flush_cache(
|
||||
async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
base_url = remote_config.get("base_url")
|
||||
if not base_url:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"No base_url configured for remote '{remote_name}'"
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'")
|
||||
|
||||
# Handle Docker registry URLs
|
||||
if remote_config.get("package") == "docker":
|
||||
@@ -175,9 +162,7 @@ async def construct_remote_url(remote_name: str, path: str) -> str:
|
||||
return f"{base_url}/{path}"
|
||||
|
||||
|
||||
async def check_artifact_patterns(
|
||||
remote_name: str, repo_path: str, file_path: str, full_path: str
|
||||
) -> bool:
|
||||
async def check_artifact_patterns(remote_name: str, repo_path: str, file_path: str, full_path: str) -> bool:
|
||||
# First check if this is an index file - always allow index files
|
||||
index_patterns = config.get_index_patterns(remote_name)
|
||||
if cache.is_index_file(file_path, index_patterns) or cache.is_index_file(full_path, index_patterns):
|
||||
@@ -267,9 +252,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
# Check if this is a local repository
|
||||
if remote_config.get("type") == "local":
|
||||
@@ -289,9 +272,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={os.path.basename(path)}"
|
||||
},
|
||||
headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"},
|
||||
)
|
||||
|
||||
# Extract repository path for pattern checking
|
||||
@@ -306,9 +287,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
# Check if artifact matches configured patterns
|
||||
if not await check_artifact_patterns(remote_name, repo_path, file_path, path):
|
||||
logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns")
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Artifact not allowed by configuration patterns"
|
||||
)
|
||||
raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns")
|
||||
|
||||
# Construct the remote URL
|
||||
remote_url = await construct_remote_url(remote_name, path)
|
||||
@@ -358,9 +337,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
metrics.record_cache_hit(remote_name, len(artifact_data))
|
||||
|
||||
# Record artifact mapping in database if not already recorded
|
||||
database.record_artifact_mapping(
|
||||
cached_key, remote_name, path, len(artifact_data)
|
||||
)
|
||||
database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data))
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
@@ -372,9 +349,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error retrieving cached artifact: {str(e)}"
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}")
|
||||
|
||||
# Artifact not cached, cache it first
|
||||
logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}")
|
||||
@@ -382,9 +357,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
|
||||
if result["status"] == "error":
|
||||
logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}")
|
||||
raise HTTPException(
|
||||
status_code=502, detail=f"Failed to fetch artifact: {result['error']}"
|
||||
)
|
||||
raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}")
|
||||
|
||||
# Mark index files as cached in Redis if this was a new download
|
||||
if result["status"] == "cached" and is_index:
|
||||
@@ -419,9 +392,7 @@ async def get_artifact(remote_name: str, path: str):
|
||||
|
||||
# Record artifact mapping in database
|
||||
cache_key = storage.get_object_key(remote_name, path)
|
||||
database.record_artifact_mapping(
|
||||
cache_key, remote_name, path, len(artifact_data)
|
||||
)
|
||||
database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data))
|
||||
|
||||
return Response(
|
||||
content=artifact_data,
|
||||
@@ -533,9 +504,7 @@ async def discover_github_releases(remote: str, include_pattern: str) -> list[st
|
||||
owner, repo = match.groups()
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
f"https://api.github.com/repos/{owner}/{repo}/releases"
|
||||
)
|
||||
response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases")
|
||||
|
||||
if response.status_code != 200:
|
||||
raise HTTPException(
|
||||
@@ -564,14 +533,10 @@ async def upload_file(remote_name: str, path: str, file: UploadFile = File(...))
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Upload only supported for local repositories"
|
||||
)
|
||||
raise HTTPException(status_code=400, detail="Upload only supported for local repositories")
|
||||
|
||||
try:
|
||||
# Read file content
|
||||
@@ -633,9 +598,7 @@ def check_file_exists(remote_name: str, path: str):
|
||||
# Check if remote is configured
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
# Handle local repository
|
||||
if remote_config.get("type") == "local":
|
||||
@@ -647,16 +610,10 @@ def check_file_exists(remote_name: str, path: str):
|
||||
return Response(
|
||||
headers={
|
||||
"Content-Length": str(metadata["size_bytes"]),
|
||||
"Content-Type": metadata.get(
|
||||
"content_type", "application/octet-stream"
|
||||
),
|
||||
"Content-Type": metadata.get("content_type", "application/octet-stream"),
|
||||
"X-SHA256": metadata["sha256_sum"],
|
||||
"X-Created-At": metadata["created_at"].isoformat()
|
||||
if metadata["created_at"]
|
||||
else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat()
|
||||
if metadata["uploaded_at"]
|
||||
else "",
|
||||
"X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "",
|
||||
"X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "",
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
@@ -665,9 +622,7 @@ def check_file_exists(remote_name: str, path: str):
|
||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||
else:
|
||||
# For remote repositories, just return 405 Method Not Allowed
|
||||
raise HTTPException(
|
||||
status_code=405, detail="HEAD method only supported for local repositories"
|
||||
)
|
||||
raise HTTPException(status_code=405, detail="HEAD method only supported for local repositories")
|
||||
|
||||
|
||||
@app.delete("/api/v1/remote/{remote_name}/{path:path}")
|
||||
@@ -676,14 +631,10 @@ def delete_file(remote_name: str, path: str):
|
||||
# Check if remote is configured and is local
|
||||
remote_config = config.get_remote_config(remote_name)
|
||||
if not remote_config:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Remote '{remote_name}' not configured"
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured")
|
||||
|
||||
if remote_config.get("type") != "local":
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Delete only supported for local repositories"
|
||||
)
|
||||
raise HTTPException(status_code=400, detail="Delete only supported for local repositories")
|
||||
|
||||
try:
|
||||
# Get S3 key before deleting from database
|
||||
@@ -704,11 +655,9 @@ def delete_file(remote_name: str, path: str):
|
||||
|
||||
|
||||
@app.post("/api/v1/artifacts/cache")
|
||||
async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
||||
async def cache_artifact(request: ArtifactRequest) -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(
|
||||
request.remote, request.include_pattern
|
||||
)
|
||||
matching_urls = await discover_artifacts(request.remote, request.include_pattern)
|
||||
|
||||
if not matching_urls:
|
||||
return {
|
||||
@@ -723,11 +672,7 @@ async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
||||
result = await cache_single_artifact(url, "", "")
|
||||
cached_artifacts.append(result)
|
||||
|
||||
cached_count = sum(
|
||||
1
|
||||
for artifact in cached_artifacts
|
||||
if artifact["status"] in ["cached", "already_cached"]
|
||||
)
|
||||
cached_count = sum(1 for artifact in cached_artifacts if artifact["status"] in ["cached", "already_cached"])
|
||||
|
||||
return {
|
||||
"message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached",
|
||||
@@ -740,9 +685,7 @@ async def cache_artifact(request: ArtifactRequest) -> Dict[str, Any]:
|
||||
|
||||
|
||||
@app.get("/api/v1/artifacts/{remote:path}")
|
||||
async def list_cached_artifacts(
|
||||
remote: str, include_pattern: str = ".*"
|
||||
) -> Dict[str, Any]:
|
||||
async def list_cached_artifacts(remote: str, include_pattern: str = ".*") -> dict[str, Any]:
|
||||
try:
|
||||
matching_urls = await discover_artifacts(remote, include_pattern)
|
||||
|
||||
@@ -750,13 +693,12 @@ async def list_cached_artifacts(
|
||||
for url in matching_urls:
|
||||
# Extract path from URL for hierarchical key generation
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path
|
||||
key = storage.get_object_key(remote, path)
|
||||
if storage.exists(key):
|
||||
cached_artifacts.append(
|
||||
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
||||
)
|
||||
cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key})
|
||||
|
||||
return {
|
||||
"remote": remote,
|
||||
@@ -772,9 +714,7 @@ async def list_cached_artifacts(
|
||||
|
||||
@app.get("/metrics")
|
||||
def get_metrics(
|
||||
json: Optional[bool] = Query(
|
||||
False, description="Return JSON format instead of Prometheus"
|
||||
),
|
||||
json: bool | None = Query(False, description="Return JSON format instead of Prometheus"),
|
||||
):
|
||||
"""Get comprehensive metrics about the artifact storage system"""
|
||||
config._check_reload()
|
||||
|
||||
+18
-51
@@ -1,22 +1,14 @@
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from typing import Any
|
||||
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
|
||||
# Prometheus metrics
|
||||
request_counter = Counter(
|
||||
"artifact_requests_total", "Total artifact requests", ["remote", "status"]
|
||||
)
|
||||
request_counter = Counter("artifact_requests_total", "Total artifact requests", ["remote", "status"])
|
||||
cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"])
|
||||
cache_miss_counter = Counter(
|
||||
"artifact_cache_misses_total", "Total cache misses", ["remote"]
|
||||
)
|
||||
bandwidth_saved_counter = Counter(
|
||||
"artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"]
|
||||
)
|
||||
storage_size_gauge = Gauge(
|
||||
"artifact_storage_size_bytes", "Storage size by remote", ["remote"]
|
||||
)
|
||||
cache_miss_counter = Counter("artifact_cache_misses_total", "Total cache misses", ["remote"])
|
||||
bandwidth_saved_counter = Counter("artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"])
|
||||
storage_size_gauge = Gauge("artifact_storage_size_bytes", "Storage size by remote", ["remote"])
|
||||
redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys")
|
||||
|
||||
|
||||
@@ -44,9 +36,7 @@ class MetricsManager:
|
||||
# Increment per-remote counters
|
||||
self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}")
|
||||
self.redis_client.client.incr(f"metrics:total_requests:{remote_name}")
|
||||
self.redis_client.client.incrby(
|
||||
f"metrics:bandwidth_saved:{remote_name}", size_bytes
|
||||
)
|
||||
self.redis_client.client.incrby(f"metrics:bandwidth_saved:{remote_name}", size_bytes)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -91,7 +81,7 @@ class MetricsManager:
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def get_s3_size_by_remote(self, storage, config_manager) -> Dict[str, int]:
|
||||
def get_s3_size_by_remote(self, storage, config_manager) -> dict[str, int]:
|
||||
"""Get size of stored data per remote using database mappings"""
|
||||
if self.database_manager and self.database_manager.available:
|
||||
# Get from database if available
|
||||
@@ -146,7 +136,7 @@ class MetricsManager:
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def get_metrics(self, storage, config_manager) -> Dict[str, Any]:
|
||||
def get_metrics(self, storage, config_manager) -> dict[str, Any]:
|
||||
"""Get comprehensive metrics"""
|
||||
# Update Redis keys gauge
|
||||
redis_key_count = self.get_redis_key_count()
|
||||
@@ -173,54 +163,31 @@ class MetricsManager:
|
||||
if self.redis_client and self.redis_client.available:
|
||||
try:
|
||||
# Get global metrics
|
||||
cache_hits = int(
|
||||
self.redis_client.client.get("metrics:cache_hits") or 0
|
||||
)
|
||||
cache_misses = int(
|
||||
self.redis_client.client.get("metrics:cache_misses") or 0
|
||||
)
|
||||
cache_hits = int(self.redis_client.client.get("metrics:cache_hits") or 0)
|
||||
cache_misses = int(self.redis_client.client.get("metrics:cache_misses") or 0)
|
||||
total_requests = cache_hits + cache_misses
|
||||
bandwidth_saved = int(
|
||||
self.redis_client.client.get("metrics:bandwidth_saved") or 0
|
||||
)
|
||||
bandwidth_saved = int(self.redis_client.client.get("metrics:bandwidth_saved") or 0)
|
||||
|
||||
metrics["requests"]["cache_hits"] = cache_hits
|
||||
metrics["requests"]["cache_misses"] = cache_misses
|
||||
metrics["requests"]["total_requests"] = total_requests
|
||||
metrics["requests"]["cache_hit_ratio"] = (
|
||||
cache_hits / total_requests if total_requests > 0 else 0.0
|
||||
)
|
||||
metrics["requests"]["cache_hit_ratio"] = cache_hits / total_requests if total_requests > 0 else 0.0
|
||||
metrics["bandwidth"]["saved_bytes"] = bandwidth_saved
|
||||
|
||||
# Get per-remote metrics
|
||||
for remote in config_manager.config.get("remotes", {}).keys():
|
||||
remote_cache_hits = int(
|
||||
self.redis_client.client.get(f"metrics:cache_hits:{remote}")
|
||||
or 0
|
||||
)
|
||||
remote_cache_misses = int(
|
||||
self.redis_client.client.get(f"metrics:cache_misses:{remote}")
|
||||
or 0
|
||||
)
|
||||
remote_cache_hits = int(self.redis_client.client.get(f"metrics:cache_hits:{remote}") or 0)
|
||||
remote_cache_misses = int(self.redis_client.client.get(f"metrics:cache_misses:{remote}") or 0)
|
||||
remote_total = remote_cache_hits + remote_cache_misses
|
||||
remote_bandwidth_saved = int(
|
||||
self.redis_client.client.get(
|
||||
f"metrics:bandwidth_saved:{remote}"
|
||||
)
|
||||
or 0
|
||||
)
|
||||
remote_bandwidth_saved = int(self.redis_client.client.get(f"metrics:bandwidth_saved:{remote}") or 0)
|
||||
|
||||
metrics["per_remote"][remote] = {
|
||||
"cache_hits": remote_cache_hits,
|
||||
"cache_misses": remote_cache_misses,
|
||||
"total_requests": remote_total,
|
||||
"cache_hit_ratio": remote_cache_hits / remote_total
|
||||
if remote_total > 0
|
||||
else 0.0,
|
||||
"cache_hit_ratio": remote_cache_hits / remote_total if remote_total > 0 else 0.0,
|
||||
"bandwidth_saved_bytes": remote_bandwidth_saved,
|
||||
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(
|
||||
remote, 0
|
||||
),
|
||||
"storage_size_bytes": metrics["storage"]["size_by_remote"].get(remote, 0),
|
||||
}
|
||||
|
||||
except Exception:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
@@ -21,23 +22,22 @@ class S3Storage:
|
||||
self.bucket = bucket
|
||||
self.secure = secure
|
||||
|
||||
ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('SSL_CERT_FILE')
|
||||
config_kwargs = {
|
||||
"request_checksum_calculation": "when_required",
|
||||
"response_checksum_validation": "when_required"
|
||||
}
|
||||
ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get("SSL_CERT_FILE")
|
||||
config_kwargs = {"request_checksum_calculation": "when_required", "response_checksum_validation": "when_required"}
|
||||
client_kwargs = {
|
||||
"endpoint_url": f"http{'s' if self.secure else ''}://{self.endpoint}",
|
||||
"aws_access_key_id": self.access_key,
|
||||
"aws_secret_access_key": self.secret_key,
|
||||
"config": Config(**config_kwargs)
|
||||
"config": Config(**config_kwargs),
|
||||
}
|
||||
|
||||
if ca_bundle and os.path.exists(ca_bundle):
|
||||
client_kwargs["verify"] = ca_bundle
|
||||
print(f"Debug: Using CA bundle: {ca_bundle}")
|
||||
else:
|
||||
print(f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}")
|
||||
print(
|
||||
f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}"
|
||||
)
|
||||
|
||||
self.client = boto3.client("s3", **client_kwargs)
|
||||
|
||||
@@ -56,7 +56,7 @@ class S3Storage:
|
||||
|
||||
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||
# Extract directory path and filename
|
||||
clean_path = path.lstrip('/')
|
||||
clean_path = path.lstrip("/")
|
||||
filename = os.path.basename(clean_path)
|
||||
directory_path = os.path.dirname(clean_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user