Compare commits

...

2 Commits

Author SHA1 Message Date
unkinben 1b585af14e feat: wire the circuit breaker into the proxy fetch path (#90)
Fixes #74

## Why
`internal/proxy/circuit.go` implemented and tested a circuit breaker, but nothing ever called it — a repeatedly-failing upstream was still hit on every request.

## Changes
- Construct a `CircuitBreaker` in `NewEngine`.
- In `Engine.Fetch`: short-circuit when the breaker is open (serve stale from the store if present, otherwise return 503), `RecordFailure` on each `UpstreamError`, and `RecordSuccess` on a successful fetch.

## Validation
- `go test ./internal/proxy/` and `make e2e` pass.

---------

Co-authored-by: BenVincent <benvin@main.unkin.net>
Reviewed-on: #90
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-07-02 22:43:22 +10:00
unkinben e7c9387bcc fix: GC has no grace period (TOCTOU with dedup uploads) (#86)
Fixes #71

## Why
`FindOrphanedBlobs` returned any blob not currently referenced. Because CAS dedups (the blob row can exist before its artifact/local_files row is written), a concurrent upload reusing an existing hash could have its S3 object deleted mid-flight by the GC.

## Changes
- `FindOrphanedBlobs` now takes a `minAge` and only returns blobs with `created_at < now()-minAge`.
- The collector passes a 1h `blobGracePeriod`.

## Validation
- `go test ./internal/gc/...` and `make e2e` pass.

---------

Co-authored-by: BenVincent <benvin@main.unkin.net>
Reviewed-on: #86
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-07-02 22:43:18 +10:00
3 changed files with 34 additions and 4 deletions
+9 -3
View File
@@ -138,16 +138,22 @@ func (db *DB) InsertAccessLogBatch(ctx context.Context, entries []AccessLogEntry
return err
}
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.content_hash NOT IN (
WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`)
`, cutoff)
if err != nil {
return nil, err
}
+6 -1
View File
@@ -9,6 +9,11 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage"
)
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct {
db *database.DB
store *storage.S3
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) {
start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx)
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil {
slog.Error("gc: find orphaned blobs", "error", err)
return
+19
View File
@@ -33,6 +33,7 @@ type Engine struct {
cache *cache.Redis
store *storage.S3
cas *storage.CAS
circuit *CircuitBreaker
accessLog chan database.AccessLogEntry
}
@@ -42,6 +43,7 @@ func NewEngine(db *database.DB, c *cache.Redis, s *storage.S3) *Engine {
cache: c,
store: s,
cas: storage.NewCAS(s),
circuit: NewCircuitBreaker(c),
accessLog: make(chan database.AccessLogEntry, accessLogBufferSize),
}
go e.runAccessLogWriter()
@@ -156,10 +158,26 @@ func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, p
fwdHeaders = clientHeaders[0]
}
// Short-circuit upstream calls when the remote's breaker is open: serve
// stale from the store if we have it, otherwise fail fast rather than
// hammering a known-bad upstream.
if e.circuit.IsOpen(ctx, remote.Name) {
if stale, serr := e.serveFromStore(ctx, remote, path); serr == nil {
slog.Warn("circuit open, serving stale", "remote", remote.Name, "path", path)
stale.Source = "cache"
e.logAccess(remote.Name, path, true, stale.Size, 0)
return stale, nil
}
return nil, &ProxyError{Status: http.StatusServiceUnavailable, Message: "upstream circuit open"}
}
start := time.Now()
result, err := e.fetchFromUpstream(ctx, remote, path, prov, class, ttl, fwdHeaders)
upstreamMS := int(time.Since(start).Milliseconds())
if err != nil {
if isNetworkError(err) {
e.circuit.RecordFailure(ctx, remote.Name)
}
if remote.StaleOnError && isNetworkError(err) {
_ = e.cache.SetTTL(ctx, remote.Name, path, ttl)
stale, serr := e.serveFromStore(ctx, remote, path)
@@ -173,6 +191,7 @@ func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, p
return nil, err
}
e.circuit.RecordSuccess(ctx, remote.Name)
e.logAccess(remote.Name, path, false, result.Size, upstreamMS)
return result, nil
}