Files
artifactapi/internal/gc/gc.go
T
unkinben 649f89f58b
ci/woodpecker/tag/docker Pipeline was successful
fix: make local docker uploads replica-independent (#104)
## Why

Chunked blob uploads kept the in-progress session in **process memory** keyed by upload UUID, so the `POST`/`PATCH`/`PUT` of a single `docker push` had to land on the same replica. The API runs at `minReplicas: 2` with no session affinity (see argocd-apps `api-hpa.yaml`), so a real push — which streams the layer via `PATCH` then finalises with `PUT` — intermittently 404s with `BLOB_UPLOAD_UNKNOWN` when a chunk hits a replica that never saw the `POST`. This was flagged when the local docker registry landed (#103).

## Changes

- Stage chunked uploads in object storage under `uploads/<uuid>` instead of an in-memory temp file. The UUID travels in the `Location` URL handed to the client, so any replica reconstructs the staging key with no shared in-process state. Finalise streams the staged bytes plus any trailing `PUT` body through the CAS in one pass; monolithic uploads are unchanged.
- Support `DELETE` of an in-progress upload (cancel) by dropping its staging object.
- Reap abandoned staging objects in the GC (`uploads/` older than 24h) via a new `S3.ListStaleObjects`, so cancelled/interrupted pushes don't leak.

## Verification

- Split a single push across **two instances sharing one Postgres+MinIO**: `POST`→A, `PATCH`→B, `PUT`→A finalises with the correct digest, and the blob pulls back **byte-identical from both** replicas. Config-blob and manifest pushes split the same way succeed; `tags/list` is correct. (Pre-fix, the cross-replica `PATCH` 404s.)
- `scripts/docker-e2e.sh` still passes (incl. `TestLocalDockerPushPull`); unit tests + `go vet` clean.

Reviewed-on: #104
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-07-05 17:39:49 +10:00

101 lines
2.6 KiB
Go

package gc
import (
"context"
"log/slog"
"time"
"git.unkin.net/unkin/artifactapi/internal/database"
"git.unkin.net/unkin/artifactapi/internal/storage"
)
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
// uploadGracePeriod is how long a docker blob-upload staging object
// (uploads/<uuid>) may sit idle before GC treats it as an abandoned push and
// reaps it. Generous so a slow but live push is never cut off mid-flight.
const uploadGracePeriod = 24 * time.Hour
type Collector struct {
db *database.DB
store *storage.S3
interval time.Duration
}
func New(db *database.DB, store *storage.S3, interval time.Duration) *Collector {
return &Collector{db: db, store: store, interval: interval}
}
func (c *Collector) Run(ctx context.Context) {
slog.Info("gc started", "interval", c.interval)
ticker := time.NewTicker(c.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
slog.Info("gc stopped")
return
case <-ticker.C:
c.sweep(ctx)
}
}
}
func (c *Collector) sweep(ctx context.Context) {
start := time.Now()
c.sweepUploads(ctx)
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil {
slog.Error("gc: find orphaned blobs", "error", err)
return
}
deleted := 0
for _, blob := range orphaned {
if err := c.store.Delete(ctx, blob.S3Key); err != nil {
slog.Warn("gc: delete s3 object", "key", blob.S3Key, "error", err)
continue
}
if err := c.db.DeleteBlob(ctx, blob.ContentHash); err != nil {
slog.Warn("gc: delete blob row", "hash", blob.ContentHash, "error", err)
continue
}
deleted++
}
if deleted > 0 || len(orphaned) > 0 {
slog.Info("gc sweep complete",
"orphaned_found", len(orphaned),
"deleted", deleted,
"duration_ms", time.Since(start).Milliseconds(),
)
}
}
// sweepUploads reaps docker blob-upload staging objects abandoned longer than
// uploadGracePeriod (cancelled or interrupted pushes that never finalised).
func (c *Collector) sweepUploads(ctx context.Context) {
stale, err := c.store.ListStaleObjects(ctx, "uploads/", time.Now().Add(-uploadGracePeriod))
if err != nil {
slog.Error("gc: list stale uploads", "error", err)
return
}
reaped := 0
for _, key := range stale {
if err := c.store.Delete(ctx, key); err != nil {
slog.Warn("gc: delete stale upload", "key", key, "error", err)
continue
}
reaped++
}
if reaped > 0 {
slog.Info("gc: reaped stale docker uploads", "count", reaped)
}
}