benvin/path-based-storage #6
4
.gitignore
vendored
4
.gitignore
vendored
@ -45,3 +45,7 @@ uv.lock
|
|||||||
|
|
||||||
# Docker volumes
|
# Docker volumes
|
||||||
minio_data/
|
minio_data/
|
||||||
|
|
||||||
|
# Local configuration overrides
|
||||||
|
docker-compose.yml
|
||||||
|
ca-bundle.pem
|
||||||
|
|||||||
@ -1,86 +0,0 @@
|
|||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
|
||||||
artifactapi:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
no_cache: true
|
|
||||||
ports:
|
|
||||||
- "8000:8000"
|
|
||||||
environment:
|
|
||||||
- CONFIG_PATH=/app/remotes.yaml
|
|
||||||
- DBHOST=postgres
|
|
||||||
- DBPORT=5432
|
|
||||||
- DBUSER=artifacts
|
|
||||||
- DBPASS=artifacts123
|
|
||||||
- DBNAME=artifacts
|
|
||||||
- REDIS_URL=redis://redis:6379
|
|
||||||
- MINIO_ENDPOINT=minio:9000
|
|
||||||
- MINIO_ACCESS_KEY=minioadmin
|
|
||||||
- MINIO_SECRET_KEY=minioadmin
|
|
||||||
- MINIO_BUCKET=artifacts
|
|
||||||
- MINIO_SECURE=false
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
redis:
|
|
||||||
condition: service_healthy
|
|
||||||
minio:
|
|
||||||
condition: service_healthy
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
|
|
||||||
minio:
|
|
||||||
image: minio/minio:latest
|
|
||||||
ports:
|
|
||||||
- "9000:9000"
|
|
||||||
- "9001:9001"
|
|
||||||
environment:
|
|
||||||
MINIO_ROOT_USER: minioadmin
|
|
||||||
MINIO_ROOT_PASSWORD: minioadmin
|
|
||||||
command: server /data --console-address ":9001"
|
|
||||||
volumes:
|
|
||||||
- minio_data:/data
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 20s
|
|
||||||
retries: 3
|
|
||||||
|
|
||||||
redis:
|
|
||||||
image: redis:7-alpine
|
|
||||||
ports:
|
|
||||||
- "6379:6379"
|
|
||||||
volumes:
|
|
||||||
- redis_data:/data
|
|
||||||
command: redis-server --save 20 1
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "redis-cli", "ping"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
|
|
||||||
postgres:
|
|
||||||
image: postgres:15-alpine
|
|
||||||
ports:
|
|
||||||
- "5432:5432"
|
|
||||||
environment:
|
|
||||||
POSTGRES_DB: artifacts
|
|
||||||
POSTGRES_USER: artifacts
|
|
||||||
POSTGRES_PASSWORD: artifacts123
|
|
||||||
volumes:
|
|
||||||
- postgres_data:/var/lib/postgresql/data
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U artifacts -d artifacts"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
minio_data:
|
|
||||||
redis_data:
|
|
||||||
postgres_data:
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "artifactapi"
|
name = "artifactapi"
|
||||||
version = "2.0.2"
|
version = "2.0.4"
|
||||||
description = "Generic artifact caching system with support for various package managers"
|
description = "Generic artifact caching system with support for various package managers"
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
@ -40,4 +40,11 @@ dev = [
|
|||||||
"isort>=5.12.0",
|
"isort>=5.12.0",
|
||||||
"mypy>=1.6.0",
|
"mypy>=1.6.0",
|
||||||
"ruff>=0.1.0",
|
"ruff>=0.1.0",
|
||||||
|
"bump-my-version>=1.2.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.bumpversion]
|
||||||
|
current_version = "2.0.4"
|
||||||
|
commit = true
|
||||||
|
tag = true
|
||||||
|
message = "Bump version: {current_version} → {new_version}"
|
||||||
|
|||||||
@ -76,10 +76,8 @@ class RedisCache:
|
|||||||
if remote_config:
|
if remote_config:
|
||||||
base_url = remote_config.get("base_url")
|
base_url = remote_config.get("base_url")
|
||||||
if base_url:
|
if base_url:
|
||||||
# Construct URL the same way as construct_remote_url
|
# Use hierarchical path-based key (same as cache_single_artifact)
|
||||||
remote_url = f"{base_url.rstrip('/')}/{path}"
|
s3_key = storage.get_object_key(remote_name, path)
|
||||||
# Use URL-based key (same as cache_single_artifact)
|
|
||||||
s3_key = storage.get_object_key(remote_url)
|
|
||||||
if storage.exists(s3_key):
|
if storage.exists(s3_key):
|
||||||
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
storage.client.delete_object(Bucket=storage.bucket, Key=s3_key)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@ -9,6 +9,13 @@ from fastapi.responses import PlainTextResponse, JSONResponse
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||||
|
|
||||||
|
try:
|
||||||
|
from importlib.metadata import version
|
||||||
|
__version__ = version("artifactapi")
|
||||||
|
except ImportError:
|
||||||
|
# Fallback for development when package isn't installed
|
||||||
|
__version__ = "dev"
|
||||||
|
|
||||||
from .config import ConfigManager
|
from .config import ConfigManager
|
||||||
from .database import DatabaseManager
|
from .database import DatabaseManager
|
||||||
from .storage import S3Storage
|
from .storage import S3Storage
|
||||||
@ -28,7 +35,7 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = FastAPI(title="Artifact Storage API", version="2.0.2")
|
app = FastAPI(title="Artifact Storage API", version=__version__)
|
||||||
|
|
||||||
# Initialize components using config
|
# Initialize components using config
|
||||||
config_path = os.environ.get("CONFIG_PATH")
|
config_path = os.environ.get("CONFIG_PATH")
|
||||||
@ -181,11 +188,8 @@ async def check_artifact_patterns(
|
|||||||
|
|
||||||
|
|
||||||
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
async def cache_single_artifact(url: str, remote_name: str, path: str) -> dict:
|
||||||
# Check if using URL-based key or path-based key
|
# Use hierarchical path-based key
|
||||||
if url.startswith("http"):
|
key = storage.get_object_key(remote_name, path)
|
||||||
key = storage.get_object_key(url)
|
|
||||||
else:
|
|
||||||
key = storage.get_object_key_from_path(remote_name, path)
|
|
||||||
|
|
||||||
if storage.exists(key):
|
if storage.exists(key):
|
||||||
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})")
|
||||||
@ -267,15 +271,10 @@ async def get_artifact(remote_name: str, path: str):
|
|||||||
# Construct the remote URL
|
# Construct the remote URL
|
||||||
remote_url = await construct_remote_url(remote_name, path)
|
remote_url = await construct_remote_url(remote_name, path)
|
||||||
|
|
||||||
# Check if artifact is already cached (try both URL and path-based keys)
|
# Check if artifact is already cached
|
||||||
url_key = storage.get_object_key(remote_url)
|
cached_key = storage.get_object_key(remote_name, path)
|
||||||
path_key = storage.get_object_key_from_path(remote_name, path)
|
if not storage.exists(cached_key):
|
||||||
|
|
||||||
cached_key = None
|
cached_key = None
|
||||||
if storage.exists(url_key):
|
|
||||||
cached_key = url_key
|
|
||||||
elif storage.exists(path_key):
|
|
||||||
cached_key = path_key
|
|
||||||
|
|
||||||
# For index files, check Redis TTL validity
|
# For index files, check Redis TTL validity
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
@ -355,7 +354,7 @@ async def get_artifact(remote_name: str, path: str):
|
|||||||
|
|
||||||
# Now return the cached artifact
|
# Now return the cached artifact
|
||||||
try:
|
try:
|
||||||
cache_key = storage.get_object_key(remote_url)
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
artifact_data = storage.download_object(cache_key)
|
artifact_data = storage.download_object(cache_key)
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
|
|
||||||
@ -377,7 +376,7 @@ async def get_artifact(remote_name: str, path: str):
|
|||||||
metrics.record_cache_miss(remote_name, len(artifact_data))
|
metrics.record_cache_miss(remote_name, len(artifact_data))
|
||||||
|
|
||||||
# Record artifact mapping in database
|
# Record artifact mapping in database
|
||||||
cache_key = storage.get_object_key(remote_url)
|
cache_key = storage.get_object_key(remote_name, path)
|
||||||
database.record_artifact_mapping(
|
database.record_artifact_mapping(
|
||||||
cache_key, remote_name, path, len(artifact_data)
|
cache_key, remote_name, path, len(artifact_data)
|
||||||
)
|
)
|
||||||
@ -625,7 +624,11 @@ async def list_cached_artifacts(
|
|||||||
|
|
||||||
cached_artifacts = []
|
cached_artifacts = []
|
||||||
for url in matching_urls:
|
for url in matching_urls:
|
||||||
key = storage.get_object_key(url)
|
# Extract path from URL for hierarchical key generation
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(url)
|
||||||
|
path = parsed.path
|
||||||
|
key = storage.get_object_key(remote, path)
|
||||||
if storage.exists(key):
|
if storage.exists(key):
|
||||||
cached_artifacts.append(
|
cached_artifacts.append(
|
||||||
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
{"url": url, "cached_url": storage.get_url(key), "key": key}
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
from urllib.parse import urlparse
|
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.config import Config
|
from botocore.config import Config
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
@ -55,17 +54,19 @@ class S3Storage:
|
|||||||
except ClientError:
|
except ClientError:
|
||||||
self.client.create_bucket(Bucket=self.bucket)
|
self.client.create_bucket(Bucket=self.bucket)
|
||||||
|
|
||||||
def get_object_key(self, url: str) -> str:
|
def get_object_key(self, remote_name: str, path: str) -> str:
|
||||||
url_hash = hashlib.sha256(url.encode()).hexdigest()[:16]
|
# Extract directory path and filename
|
||||||
parsed = urlparse(url)
|
clean_path = path.lstrip('/')
|
||||||
filename = os.path.basename(parsed.path)
|
filename = os.path.basename(clean_path)
|
||||||
return f"{parsed.netloc}/{url_hash}/{filename}"
|
directory_path = os.path.dirname(clean_path)
|
||||||
|
|
||||||
def get_object_key_from_path(self, remote_name: str, path: str) -> str:
|
# Hash the directory path to keep keys manageable while preserving remote structure
|
||||||
# Create a key based on the API path for direct access
|
if directory_path:
|
||||||
path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16]
|
||||||
filename = os.path.basename(path)
|
|
||||||
return f"{remote_name}/{path_hash}/{filename}"
|
return f"{remote_name}/{path_hash}/{filename}"
|
||||||
|
else:
|
||||||
|
# If no directory, just use remote and filename
|
||||||
|
return f"{remote_name}/{filename}"
|
||||||
|
|
||||||
def exists(self, key: str) -> bool:
|
def exists(self, key: str) -> bool:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user