fix: GC has no grace period (TOCTOU with dedup uploads) (#86)
Fixes #71 ## Why `FindOrphanedBlobs` returned any blob not currently referenced. Because CAS dedups (the blob row can exist before its artifact/local_files row is written), a concurrent upload reusing an existing hash could have its S3 object deleted mid-flight by the GC. ## Changes - `FindOrphanedBlobs` now takes a `minAge` and only returns blobs with `created_at < now()-minAge`. - The collector passes a 1h `blobGracePeriod`. ## Validation - `go test ./internal/gc/...` and `make e2e` pass. --------- Co-authored-by: BenVincent <benvin@main.unkin.net> Reviewed-on: #86 Co-authored-by: Ben Vincent <ben@unkin.net> Co-committed-by: Ben Vincent <ben@unkin.net>
This commit was merged in pull request #86.
This commit is contained in:
@@ -138,16 +138,22 @@ func (db *DB) InsertAccessLogBatch(ctx context.Context, entries []AccessLogEntry
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
|
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
|
||||||
|
// local file, restricted to those created before now()-minAge. The age cutoff
|
||||||
|
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
|
||||||
|
// which insert the blob row before the referencing artifact/local_files row.
|
||||||
|
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
|
||||||
|
cutoff := time.Now().Add(-minAge)
|
||||||
rows, err := db.Pool.Query(ctx, `
|
rows, err := db.Pool.Query(ctx, `
|
||||||
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
|
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
|
||||||
FROM blobs b
|
FROM blobs b
|
||||||
WHERE b.content_hash NOT IN (
|
WHERE b.created_at < $1
|
||||||
|
AND b.content_hash NOT IN (
|
||||||
SELECT content_hash FROM artifacts
|
SELECT content_hash FROM artifacts
|
||||||
UNION
|
UNION
|
||||||
SELECT content_hash FROM local_files
|
SELECT content_hash FROM local_files
|
||||||
)
|
)
|
||||||
`)
|
`, cutoff)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
+6
-1
@@ -9,6 +9,11 @@ import (
|
|||||||
"git.unkin.net/unkin/artifactapi/internal/storage"
|
"git.unkin.net/unkin/artifactapi/internal/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// blobGracePeriod is how old an orphaned blob must be before GC will delete
|
||||||
|
// it. This avoids racing in-flight dedup uploads that insert the blob row
|
||||||
|
// before the referencing artifact/local_files row exists.
|
||||||
|
const blobGracePeriod = 1 * time.Hour
|
||||||
|
|
||||||
type Collector struct {
|
type Collector struct {
|
||||||
db *database.DB
|
db *database.DB
|
||||||
store *storage.S3
|
store *storage.S3
|
||||||
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
|
|||||||
func (c *Collector) sweep(ctx context.Context) {
|
func (c *Collector) sweep(ctx context.Context) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
orphaned, err := c.db.FindOrphanedBlobs(ctx)
|
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("gc: find orphaned blobs", "error", err)
|
slog.Error("gc: find orphaned blobs", "error", err)
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user