diff --git a/internal/database/artifacts.go b/internal/database/artifacts.go index fcd6e21..68acd01 100644 --- a/internal/database/artifacts.go +++ b/internal/database/artifacts.go @@ -109,16 +109,22 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach return err } -func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) { +// FindOrphanedBlobs returns blobs no longer referenced by any artifact or +// local file, restricted to those created before now()-minAge. The age cutoff +// is a grace period that avoids a TOCTOU race with in-flight dedup uploads, +// which insert the blob row before the referencing artifact/local_files row. +func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) { + cutoff := time.Now().Add(-minAge) rows, err := db.Pool.Query(ctx, ` SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at FROM blobs b - WHERE b.content_hash NOT IN ( + WHERE b.created_at < $1 + AND b.content_hash NOT IN ( SELECT content_hash FROM artifacts UNION SELECT content_hash FROM local_files ) - `) + `, cutoff) if err != nil { return nil, err } diff --git a/internal/gc/gc.go b/internal/gc/gc.go index d024334..fbf0e36 100644 --- a/internal/gc/gc.go +++ b/internal/gc/gc.go @@ -9,6 +9,11 @@ import ( "git.unkin.net/unkin/artifactapi/internal/storage" ) +// blobGracePeriod is how old an orphaned blob must be before GC will delete +// it. This avoids racing in-flight dedup uploads that insert the blob row +// before the referencing artifact/local_files row exists. +const blobGracePeriod = 1 * time.Hour + type Collector struct { db *database.DB store *storage.S3 @@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) { func (c *Collector) sweep(ctx context.Context) { start := time.Now() - orphaned, err := c.db.FindOrphanedBlobs(ctx) + orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod) if err != nil { slog.Error("gc: find orphaned blobs", "error", err) return