fix: GC has no grace period (TOCTOU with dedup uploads) (#86)

Fixes #71

## Why
`FindOrphanedBlobs` returned any blob not currently referenced. Because CAS dedups (the blob row can exist before its artifact/local_files row is written), a concurrent upload reusing an existing hash could have its S3 object deleted mid-flight by the GC.

## Changes
- `FindOrphanedBlobs` now takes a `minAge` and only returns blobs with `created_at < now()-minAge`.
- The collector passes a 1h `blobGracePeriod`.

## Validation
- `go test ./internal/gc/...` and `make e2e` pass.

---------

Co-authored-by: BenVincent <benvin@main.unkin.net>
Reviewed-on: #86
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
This commit was merged in pull request #86.
This commit is contained in:
2026-07-02 22:43:18 +10:00
committed by BenVincent
parent 7e07eaa758
commit e7c9387bcc
2 changed files with 15 additions and 4 deletions
+9 -3
View File
@@ -138,16 +138,22 @@ func (db *DB) InsertAccessLogBatch(ctx context.Context, entries []AccessLogEntry
return err
}
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.content_hash NOT IN (
WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`)
`, cutoff)
if err != nil {
return nil, err
}