diff --git a/.gitignore b/.gitignore index 0579fc4..29ba752 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,7 @@ uv.lock # Docker volumes minio_data/ + +# Local configuration overrides +docker-compose.yml +ca-bundle.pem diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index c0df6f8..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,86 +0,0 @@ -version: '3.8' - -services: - artifactapi: - build: - context: . - dockerfile: Dockerfile - no_cache: true - ports: - - "8000:8000" - environment: - - CONFIG_PATH=/app/remotes.yaml - - DBHOST=postgres - - DBPORT=5432 - - DBUSER=artifacts - - DBPASS=artifacts123 - - DBNAME=artifacts - - REDIS_URL=redis://redis:6379 - - MINIO_ENDPOINT=minio:9000 - - MINIO_ACCESS_KEY=minioadmin - - MINIO_SECRET_KEY=minioadmin - - MINIO_BUCKET=artifacts - - MINIO_SECURE=false - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - minio: - condition: service_healthy - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] - interval: 30s - timeout: 10s - retries: 3 - - minio: - image: minio/minio:latest - ports: - - "9000:9000" - - "9001:9001" - environment: - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin - command: server /data --console-address ":9001" - volumes: - - minio_data:/data - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - redis: - image: redis:7-alpine - ports: - - "6379:6379" - volumes: - - redis_data:/data - command: redis-server --save 20 1 - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 30s - timeout: 10s - retries: 3 - - postgres: - image: postgres:15-alpine - ports: - - "5432:5432" - environment: - POSTGRES_DB: artifacts - POSTGRES_USER: artifacts - POSTGRES_PASSWORD: artifacts123 - volumes: - - postgres_data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U artifacts -d artifacts"] - interval: 30s - timeout: 10s - retries: 3 - -volumes: - minio_data: - redis_data: - postgres_data: diff --git a/pyproject.toml b/pyproject.toml index a011b74..0fb319f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "artifactapi" -version = "2.0.2" +version = "2.0.4" description = "Generic artifact caching system with support for various package managers" dependencies = [ @@ -40,4 +40,11 @@ dev = [ "isort>=5.12.0", "mypy>=1.6.0", "ruff>=0.1.0", + "bump-my-version>=1.2.0", ] + +[tool.bumpversion] +current_version = "2.0.4" +commit = true +tag = true +message = "Bump version: {current_version} → {new_version}" diff --git a/src/artifactapi/cache.py b/src/artifactapi/cache.py index 7267bbd..42090dc 100644 --- a/src/artifactapi/cache.py +++ b/src/artifactapi/cache.py @@ -76,10 +76,8 @@ class RedisCache: if remote_config: base_url = remote_config.get("base_url") if base_url: - # Construct URL the same way as construct_remote_url - remote_url = f"{base_url.rstrip('/')}/{path}" - # Use URL-based key (same as cache_single_artifact) - s3_key = storage.get_object_key(remote_url) + # Use hierarchical path-based key (same as cache_single_artifact) + s3_key = storage.get_object_key(remote_name, path) if storage.exists(s3_key): storage.client.delete_object(Bucket=storage.bucket, Key=s3_key) except Exception: diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 6184fe8..cf593b1 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -9,6 +9,13 @@ from fastapi.responses import PlainTextResponse, JSONResponse from pydantic import BaseModel from prometheus_client import generate_latest, CONTENT_TYPE_LATEST +try: + from importlib.metadata import version + __version__ = version("artifactapi") +except ImportError: + # Fallback for development when package isn't installed + __version__ = "dev" + from .config import ConfigManager from .database import DatabaseManager from .storage import S3Storage @@ -28,7 +35,7 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -app = FastAPI(title="Artifact Storage API", version="2.0.2") +app = FastAPI(title="Artifact Storage API", version=__version__) # Initialize components using config config_path = os.environ.get("CONFIG_PATH") @@ -181,11 +188,8 @@ async def check_artifact_patterns( async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict: - # Check if using URL-based key or path-based key - if url.startswith("http"): - key = storage.get_object_key(url) - else: - key = storage.get_object_key_from_path(remote_name, path) + # Use hierarchical path-based key + key = storage.get_object_key(remote_name, path) if storage.exists(key): logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})") @@ -267,15 +271,10 @@ async def get_artifact(remote_name: str, path: str): # Construct the remote URL remote_url = await construct_remote_url(remote_name, path) - # Check if artifact is already cached (try both URL and path-based keys) - url_key = storage.get_object_key(remote_url) - path_key = storage.get_object_key_from_path(remote_name, path) - - cached_key = None - if storage.exists(url_key): - cached_key = url_key - elif storage.exists(path_key): - cached_key = path_key + # Check if artifact is already cached + cached_key = storage.get_object_key(remote_name, path) + if not storage.exists(cached_key): + cached_key = None # For index files, check Redis TTL validity filename = os.path.basename(path) @@ -355,7 +354,7 @@ async def get_artifact(remote_name: str, path: str): # Now return the cached artifact try: - cache_key = storage.get_object_key(remote_url) + cache_key = storage.get_object_key(remote_name, path) artifact_data = storage.download_object(cache_key) filename = os.path.basename(path) @@ -377,7 +376,7 @@ async def get_artifact(remote_name: str, path: str): metrics.record_cache_miss(remote_name, len(artifact_data)) # Record artifact mapping in database - cache_key = storage.get_object_key(remote_url) + cache_key = storage.get_object_key(remote_name, path) database.record_artifact_mapping( cache_key, remote_name, path, len(artifact_data) ) @@ -625,7 +624,11 @@ async def list_cached_artifacts( cached_artifacts = [] for url in matching_urls: - key = storage.get_object_key(url) + # Extract path from URL for hierarchical key generation + from urllib.parse import urlparse + parsed = urlparse(url) + path = parsed.path + key = storage.get_object_key(remote, path) if storage.exists(key): cached_artifacts.append( {"url": url, "cached_url": storage.get_url(key), "key": key} diff --git a/src/artifactapi/storage.py b/src/artifactapi/storage.py index a1df714..3e84792 100644 --- a/src/artifactapi/storage.py +++ b/src/artifactapi/storage.py @@ -1,6 +1,5 @@ import os import hashlib -from urllib.parse import urlparse import boto3 from botocore.config import Config from botocore.exceptions import ClientError @@ -55,17 +54,19 @@ class S3Storage: except ClientError: self.client.create_bucket(Bucket=self.bucket) - def get_object_key(self, url: str) -> str: - url_hash = hashlib.sha256(url.encode()).hexdigest()[:16] - parsed = urlparse(url) - filename = os.path.basename(parsed.path) - return f"{parsed.netloc}/{url_hash}/{filename}" + def get_object_key(self, remote_name: str, path: str) -> str: + # Extract directory path and filename + clean_path = path.lstrip('/') + filename = os.path.basename(clean_path) + directory_path = os.path.dirname(clean_path) - def get_object_key_from_path(self, remote_name: str, path: str) -> str: - # Create a key based on the API path for direct access - path_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - filename = os.path.basename(path) - return f"{remote_name}/{path_hash}/{filename}" + # Hash the directory path to keep keys manageable while preserving remote structure + if directory_path: + path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16] + return f"{remote_name}/{path_hash}/{filename}" + else: + # If no directory, just use remote and filename + return f"{remote_name}/{filename}" def exists(self, key: str) -> bool: try: