fix: GC has no grace period (TOCTOU with dedup uploads) (#86)
Fixes #71 ## Why `FindOrphanedBlobs` returned any blob not currently referenced. Because CAS dedups (the blob row can exist before its artifact/local_files row is written), a concurrent upload reusing an existing hash could have its S3 object deleted mid-flight by the GC. ## Changes - `FindOrphanedBlobs` now takes a `minAge` and only returns blobs with `created_at < now()-minAge`. - The collector passes a 1h `blobGracePeriod`. ## Validation - `go test ./internal/gc/...` and `make e2e` pass. --------- Co-authored-by: BenVincent <benvin@main.unkin.net> Reviewed-on: #86 Co-authored-by: Ben Vincent <ben@unkin.net> Co-committed-by: Ben Vincent <ben@unkin.net>
This commit was merged in pull request #86.
This commit is contained in:
+6
-1
@@ -9,6 +9,11 @@ import (
|
||||
"git.unkin.net/unkin/artifactapi/internal/storage"
|
||||
)
|
||||
|
||||
// blobGracePeriod is how old an orphaned blob must be before GC will delete
|
||||
// it. This avoids racing in-flight dedup uploads that insert the blob row
|
||||
// before the referencing artifact/local_files row exists.
|
||||
const blobGracePeriod = 1 * time.Hour
|
||||
|
||||
type Collector struct {
|
||||
db *database.DB
|
||||
store *storage.S3
|
||||
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
|
||||
func (c *Collector) sweep(ctx context.Context) {
|
||||
start := time.Now()
|
||||
|
||||
orphaned, err := c.db.FindOrphanedBlobs(ctx)
|
||||
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
|
||||
if err != nil {
|
||||
slog.Error("gc: find orphaned blobs", "error", err)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user