diff --git a/pyproject.toml b/pyproject.toml index a011b74..44b185a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,4 +40,18 @@ dev = [ "isort>=5.12.0", "mypy>=1.6.0", "ruff>=0.1.0", + "bumpver>=2025.1131", ] + +[tool.bumpver] +current_version = "2.0.2" +version_pattern = "MAJOR.MINOR.PATCH" +commit_message = "Bump version {old_version} → {new_version}" +commit = true +tag = true +push = false + +[[tool.bumpver.files]] +filename = "pyproject.toml" +search = 'version = "{current_version}"' +replace = 'version = "{new_version}"' diff --git a/src/artifactapi/cache.py b/src/artifactapi/cache.py index 7267bbd..42090dc 100644 --- a/src/artifactapi/cache.py +++ b/src/artifactapi/cache.py @@ -76,10 +76,8 @@ class RedisCache: if remote_config: base_url = remote_config.get("base_url") if base_url: - # Construct URL the same way as construct_remote_url - remote_url = f"{base_url.rstrip('/')}/{path}" - # Use URL-based key (same as cache_single_artifact) - s3_key = storage.get_object_key(remote_url) + # Use hierarchical path-based key (same as cache_single_artifact) + s3_key = storage.get_object_key(remote_name, path) if storage.exists(s3_key): storage.client.delete_object(Bucket=storage.bucket, Key=s3_key) except Exception: diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py index 6184fe8..cf593b1 100644 --- a/src/artifactapi/main.py +++ b/src/artifactapi/main.py @@ -9,6 +9,13 @@ from fastapi.responses import PlainTextResponse, JSONResponse from pydantic import BaseModel from prometheus_client import generate_latest, CONTENT_TYPE_LATEST +try: + from importlib.metadata import version + __version__ = version("artifactapi") +except ImportError: + # Fallback for development when package isn't installed + __version__ = "dev" + from .config import ConfigManager from .database import DatabaseManager from .storage import S3Storage @@ -28,7 +35,7 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -app = FastAPI(title="Artifact Storage API", version="2.0.2") +app = FastAPI(title="Artifact Storage API", version=__version__) # Initialize components using config config_path = os.environ.get("CONFIG_PATH") @@ -181,11 +188,8 @@ async def check_artifact_patterns( async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict: - # Check if using URL-based key or path-based key - if url.startswith("http"): - key = storage.get_object_key(url) - else: - key = storage.get_object_key_from_path(remote_name, path) + # Use hierarchical path-based key + key = storage.get_object_key(remote_name, path) if storage.exists(key): logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})") @@ -267,15 +271,10 @@ async def get_artifact(remote_name: str, path: str): # Construct the remote URL remote_url = await construct_remote_url(remote_name, path) - # Check if artifact is already cached (try both URL and path-based keys) - url_key = storage.get_object_key(remote_url) - path_key = storage.get_object_key_from_path(remote_name, path) - - cached_key = None - if storage.exists(url_key): - cached_key = url_key - elif storage.exists(path_key): - cached_key = path_key + # Check if artifact is already cached + cached_key = storage.get_object_key(remote_name, path) + if not storage.exists(cached_key): + cached_key = None # For index files, check Redis TTL validity filename = os.path.basename(path) @@ -355,7 +354,7 @@ async def get_artifact(remote_name: str, path: str): # Now return the cached artifact try: - cache_key = storage.get_object_key(remote_url) + cache_key = storage.get_object_key(remote_name, path) artifact_data = storage.download_object(cache_key) filename = os.path.basename(path) @@ -377,7 +376,7 @@ async def get_artifact(remote_name: str, path: str): metrics.record_cache_miss(remote_name, len(artifact_data)) # Record artifact mapping in database - cache_key = storage.get_object_key(remote_url) + cache_key = storage.get_object_key(remote_name, path) database.record_artifact_mapping( cache_key, remote_name, path, len(artifact_data) ) @@ -625,7 +624,11 @@ async def list_cached_artifacts( cached_artifacts = [] for url in matching_urls: - key = storage.get_object_key(url) + # Extract path from URL for hierarchical key generation + from urllib.parse import urlparse + parsed = urlparse(url) + path = parsed.path + key = storage.get_object_key(remote, path) if storage.exists(key): cached_artifacts.append( {"url": url, "cached_url": storage.get_url(key), "key": key} diff --git a/src/artifactapi/storage.py b/src/artifactapi/storage.py index a1df714..3e84792 100644 --- a/src/artifactapi/storage.py +++ b/src/artifactapi/storage.py @@ -1,6 +1,5 @@ import os import hashlib -from urllib.parse import urlparse import boto3 from botocore.config import Config from botocore.exceptions import ClientError @@ -55,17 +54,19 @@ class S3Storage: except ClientError: self.client.create_bucket(Bucket=self.bucket) - def get_object_key(self, url: str) -> str: - url_hash = hashlib.sha256(url.encode()).hexdigest()[:16] - parsed = urlparse(url) - filename = os.path.basename(parsed.path) - return f"{parsed.netloc}/{url_hash}/{filename}" + def get_object_key(self, remote_name: str, path: str) -> str: + # Extract directory path and filename + clean_path = path.lstrip('/') + filename = os.path.basename(clean_path) + directory_path = os.path.dirname(clean_path) - def get_object_key_from_path(self, remote_name: str, path: str) -> str: - # Create a key based on the API path for direct access - path_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - filename = os.path.basename(path) - return f"{remote_name}/{path_hash}/{filename}" + # Hash the directory path to keep keys manageable while preserving remote structure + if directory_path: + path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16] + return f"{remote_name}/{path_hash}/{filename}" + else: + # If no directory, just use remote and filename + return f"{remote_name}/{filename}" def exists(self, key: str) -> bool: try: