Compare commits

..

1 Commits

Author SHA1 Message Date
unkinben 8fc1635d11 fix: add a grace period before GC deletes orphaned blobs
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful
FindOrphanedBlobs returned any unreferenced blob, so a concurrent dedup
upload (which inserts the blob row before its artifact/local_files row)
could have its S3 object deleted mid-flight. Restrict collection to blobs
older than a 1h grace period.

Refs #71
2026-07-02 00:30:11 +10:00
2 changed files with 15 additions and 4 deletions
+9 -3
View File
@@ -109,16 +109,22 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach
return err return err
} }
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) { // FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, ` rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b FROM blobs b
WHERE b.content_hash NOT IN ( WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts SELECT content_hash FROM artifacts
UNION UNION
SELECT content_hash FROM local_files SELECT content_hash FROM local_files
) )
`) `, cutoff)
if err != nil { if err != nil {
return nil, err return nil, err
} }
+6 -1
View File
@@ -9,6 +9,11 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage" "git.unkin.net/unkin/artifactapi/internal/storage"
) )
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct { type Collector struct {
db *database.DB db *database.DB
store *storage.S3 store *storage.S3
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) { func (c *Collector) sweep(ctx context.Context) {
start := time.Now() start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx) orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil { if err != nil {
slog.Error("gc: find orphaned blobs", "error", err) slog.Error("gc: find orphaned blobs", "error", err)
return return