Compare commits

..

6 Commits

Author SHA1 Message Date
unkinben 8fc1635d11 fix: add a grace period before GC deletes orphaned blobs
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful
FindOrphanedBlobs returned any unreferenced blob, so a concurrent dedup
upload (which inserts the blob row before its artifact/local_files row)
could have its S3 object deleted mid-flight. Restrict collection to blobs
older than a 1h grace period.

Refs #71
2026-07-02 00:30:11 +10:00
unkinben 8d9bc1c422 feat: add bandwidth saved stat to dashboard (#65)
ci/woodpecker/tag/docker Pipeline was successful
Shows total bytes served from cache (instead of upstream) over the last 30 days. Queries `SUM(size_bytes) WHERE cache_hit = TRUE` from access_log.

Reviewed-on: #65
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 22:18:02 +10:00
unkinben 30b7cef026 fix: strip base URL path prefix from helm chart download URLs (#64)
ci/woodpecker/tag/docker Pipeline was successful
When a helm repo base URL includes a path component (e.g. \`stakater.github.io/stakater-charts\`), the merger was extracting the full URL path (\`stakater-charts/reloader-2.2.8.tgz\`) and the proxy then constructed \`base_url/stakater-charts/reloader-2.2.8.tgz\` = double path = 404.

Fix: \`extractPathRelativeToBase()\` strips the shared base path prefix so only the filename portion is used as the proxy path.
Reviewed-on: #64
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 08:02:52 +10:00
unkinben 603be5b989 fix: report actual version instead of hardcoded 3.0.0-dev (#63)
ci/woodpecker/tag/docker Pipeline was successful
The / endpoint was hardcoded to return 3.0.0-dev. Now uses the git tag version set via ldflags at build time.

Reviewed-on: #63
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 00:51:26 +10:00
unkinben 9eba49500c feat: forward Accept header and fix Content-Type for Docker proxying (#62)
## Problems
1. Docker daemon sends specific Accept headers to negotiate manifest format, but the proxy dropped them — registries defaulted to OCI format, causing "mediaType should be manifest.v2+json not oci.image.index" errors
2. Upstream Content-Type was only used when the provider returned "application/octet-stream" — Docker manifests got the wrong Content-Type

## Fixes
- Forward client Accept header to upstream (both initial request and Bearer token retry)
- Always prefer upstream Content-Type when present
- Fetch signature now accepts variadic clientHeaders for backwards compat

## E2E tested
- DockerHub: redis:7-alpine, alpine:3 — skopeo inspect OK
- GHCR: OCI-only images work with docker pull (GHCR 404s Docker v2 Accept, which is expected)
- Quay: prometheus/node-exporter — skopeo inspect OK

Reviewed-on: #62
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 00:45:23 +10:00
unkinben 0083d67272 fix: nginx config for UI serving under base path (#61)
Vite's \`base: /ui\` makes HTML reference \`/ui/assets/...\` but files are at \`/usr/share/nginx/html/assets/\` (no \`ui/\` subdir). The previous \`location /ui { try_files ... }\` couldn't find the files.

Fix: rewrite strips the base path prefix before try_files, so \`/ui/assets/foo.js\` resolves to \`/usr/share/nginx/html/assets/foo.js\`.
Reviewed-on: #61
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 00:43:45 +10:00
13 changed files with 81 additions and 16 deletions
+2
View File
@@ -8,6 +8,8 @@ steps:
settings:
registry: git.unkin.net
repo: git.unkin.net/unkin/artifactapi
build_args:
VERSION: ${CI_COMMIT_TAG}
username: droneci
password:
from_secret: DRONECI_PASSWORD
+2 -1
View File
@@ -9,7 +9,8 @@ RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -ldflags="-s -w" -o artifactapi ./cmd/artifactapi
ARG VERSION=dev
RUN CGO_ENABLED=0 go build -ldflags="-s -w -X main.version=${VERSION}" -o artifactapi ./cmd/artifactapi
FROM gcr.io/distroless/static-debian12:nonroot
+1 -1
View File
@@ -12,7 +12,7 @@ check-go:
fi
build: check-go tidy
go build -ldflags="-s -w" -o $(BINARY) ./cmd/artifactapi
go build -ldflags="-s -w -X main.version=$(VERSION)" -o $(BINARY) ./cmd/artifactapi
test: check-go
go test -race -count=1 ./pkg/... ./internal/...
+3 -1
View File
@@ -13,6 +13,8 @@ import (
"git.unkin.net/unkin/artifactapi/internal/tui"
)
var version = "dev"
func main() {
if len(os.Args) > 1 && os.Args[1] == "tui" {
endpoint := os.Getenv("ARTIFACTAPI_ENDPOINT")
@@ -42,7 +44,7 @@ func main() {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
srv, err := server.New(cfg)
srv, err := server.New(cfg, version)
if err != nil {
slog.Error("failed to create server", "error", err)
os.Exit(1)
+1 -1
View File
@@ -67,7 +67,7 @@ func (h *ProxyHandler) handleProxy(w http.ResponseWriter, r *http.Request) {
return
}
result, err := h.engine.Fetch(r.Context(), *remote, path, prov)
result, err := h.engine.Fetch(r.Context(), *remote, path, prov, r.Header)
if err != nil {
var proxyErr *proxy.ProxyError
if errors.As(err, &proxyErr) {
+9 -3
View File
@@ -109,16 +109,22 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach
return err
}
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.content_hash NOT IN (
WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`)
`, cutoff)
if err != nil {
return nil, err
}
+9
View File
@@ -30,6 +30,15 @@ func (db *DB) GetOverviewStats(ctx context.Context) (*models.OverviewStats, erro
return nil, err
}
err = db.Pool.QueryRow(ctx, `
SELECT COALESCE(SUM(size_bytes), 0)
FROM access_log
WHERE cache_hit = TRUE AND created_at > NOW() - INTERVAL '30 days'
`).Scan(&stats.BandwidthSaved30d)
if err != nil {
return nil, err
}
return &stats, nil
}
+6 -1
View File
@@ -9,6 +9,11 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage"
)
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct {
db *database.DB
store *storage.S3
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) {
start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx)
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil {
slog.Error("gc: find orphaned blobs", "error", err)
return
+19 -4
View File
@@ -44,7 +44,7 @@ type FetchResult struct {
Source string // "cache" or "remote"
}
func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, prov provider.Provider) (*FetchResult, error) {
func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, prov provider.Provider, clientHeaders ...http.Header) (*FetchResult, error) {
classifier := NewClassifier(prov)
class := classifier.Classify(remote, path)
@@ -105,8 +105,13 @@ func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, p
}
}
var fwdHeaders http.Header
if len(clientHeaders) > 0 && clientHeaders[0] != nil {
fwdHeaders = clientHeaders[0]
}
start := time.Now()
result, err := e.fetchFromUpstream(ctx, remote, path, prov, class, ttl)
result, err := e.fetchFromUpstream(ctx, remote, path, prov, class, ttl, fwdHeaders)
upstreamMS := int(time.Since(start).Milliseconds())
if err != nil {
if remote.StaleOnError && isNetworkError(err) {
@@ -126,7 +131,7 @@ func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, p
return result, nil
}
func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, path string, prov provider.Provider, class Classification, ttl time.Duration) (*FetchResult, error) {
func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, path string, prov provider.Provider, class Classification, ttl time.Duration, clientHeaders http.Header) (*FetchResult, error) {
url := prov.UpstreamURL(remote, path)
authHeaders, err := prov.AuthHeaders(ctx, remote)
@@ -143,6 +148,11 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa
req.Header.Add(k, v)
}
}
if clientHeaders != nil {
if accept := clientHeaders.Get("Accept"); accept != "" {
req.Header.Set("Accept", accept)
}
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
@@ -155,6 +165,11 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa
if err == nil && token != "" {
req2, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
req2.Header.Set("Authorization", "Bearer "+token)
if clientHeaders != nil {
if accept := clientHeaders.Get("Accept"); accept != "" {
req2.Header.Set("Accept", accept)
}
}
resp, err = http.DefaultClient.Do(req2)
if err != nil {
return nil, &UpstreamError{Err: err}
@@ -184,7 +199,7 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa
}
contentType := prov.ContentType(path)
if ct := resp.Header.Get("Content-Type"); ct != "" && contentType == "application/octet-stream" {
if ct := resp.Header.Get("Content-Type"); ct != "" {
contentType = ct
}
+4 -2
View File
@@ -35,6 +35,7 @@ import (
type Server struct {
cfg *config.Config
version string
router chi.Router
db *database.DB
cache *cache.Redis
@@ -45,7 +46,7 @@ type Server struct {
gc *gc.Collector
}
func New(cfg *config.Config) (*Server, error) {
func New(cfg *config.Config, version string) (*Server, error) {
db, err := database.New(cfg.DatabaseDSN())
if err != nil {
return nil, fmt.Errorf("database: %w", err)
@@ -68,6 +69,7 @@ func New(cfg *config.Config) (*Server, error) {
s := &Server{
cfg: cfg,
version: version,
db: db,
cache: redis,
store: s3,
@@ -138,7 +140,7 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, `{"name":"artifactapi","version":"3.0.0-dev"}`)
fmt.Fprintf(w, `{"name":"artifactapi","version":"%s"}`, s.version)
}
func (s *Server) newHTTPServer() *http.Server {
+14 -1
View File
@@ -65,11 +65,12 @@ func (m *HelmMerger) MergeIndexes(members []MemberIndex, proxyBaseURL string) ([
if baseHost != "" && extractHost(u) != baseHost {
continue
}
relPath := extractPathRelativeToBase(u, member.BaseURL)
ver.URLs[i] = fmt.Sprintf("%s/api/v1/%s/%s/%s",
strings.TrimRight(proxyBaseURL, "/"),
routePrefix,
member.RemoteName,
extractPath(u))
relPath)
} else {
ver.URLs[i] = fmt.Sprintf("%s/api/v1/%s/%s/%s",
strings.TrimRight(proxyBaseURL, "/"),
@@ -102,6 +103,18 @@ func extractHost(rawURL string) string {
return rest[:slashIdx]
}
func extractPathRelativeToBase(rawURL, baseURL string) string {
fullPath := extractPath(rawURL)
basePath := extractPath(baseURL)
if basePath != "" {
basePath = strings.TrimRight(basePath, "/") + "/"
if strings.HasPrefix(fullPath, basePath) {
return fullPath[len(basePath):]
}
}
return fullPath
}
func extractPath(rawURL string) string {
idx := strings.Index(rawURL, "://")
if idx == -1 {
+6 -1
View File
@@ -5,7 +5,12 @@ server {
root /usr/share/nginx/html;
index index.html;
location ${BASE_PATH} {
location ${BASE_PATH}/ {
rewrite ^${BASE_PATH}(/.*)$ $1 break;
try_files $uri $uri/ /index.html;
}
location = ${BASE_PATH} {
return 301 ${BASE_PATH}/;
}
}
+5
View File
@@ -50,6 +50,11 @@ export function Dashboard() {
value={formatNumber(stats.total_blobs_deduped)}
sub="shared blobs"
/>
<StatsCard
label="Bandwidth Saved"
value={formatBytes(stats.bandwidth_saved_30d)}
sub="last 30 days"
/>
</div>
{health && (