Compare commits

..

2 Commits

Author SHA1 Message Date
unkinben 8fc1635d11 fix: add a grace period before GC deletes orphaned blobs
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful
FindOrphanedBlobs returned any unreferenced blob, so a concurrent dedup
upload (which inserts the blob row before its artifact/local_files row)
could have its S3 object deleted mid-flight. Restrict collection to blobs
older than a 1h grace period.

Refs #71
2026-07-02 00:30:11 +10:00
unkinben 8d9bc1c422 feat: add bandwidth saved stat to dashboard (#65)
ci/woodpecker/tag/docker Pipeline was successful
Shows total bytes served from cache (instead of upstream) over the last 30 days. Queries `SUM(size_bytes) WHERE cache_hit = TRUE` from access_log.

Reviewed-on: #65
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 22:18:02 +10:00
4 changed files with 29 additions and 4 deletions
+9 -3
View File
@@ -109,16 +109,22 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach
return err return err
} }
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) { // FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, ` rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b FROM blobs b
WHERE b.content_hash NOT IN ( WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts SELECT content_hash FROM artifacts
UNION UNION
SELECT content_hash FROM local_files SELECT content_hash FROM local_files
) )
`) `, cutoff)
if err != nil { if err != nil {
return nil, err return nil, err
} }
+9
View File
@@ -30,6 +30,15 @@ func (db *DB) GetOverviewStats(ctx context.Context) (*models.OverviewStats, erro
return nil, err return nil, err
} }
err = db.Pool.QueryRow(ctx, `
SELECT COALESCE(SUM(size_bytes), 0)
FROM access_log
WHERE cache_hit = TRUE AND created_at > NOW() - INTERVAL '30 days'
`).Scan(&stats.BandwidthSaved30d)
if err != nil {
return nil, err
}
return &stats, nil return &stats, nil
} }
+6 -1
View File
@@ -9,6 +9,11 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage" "git.unkin.net/unkin/artifactapi/internal/storage"
) )
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct { type Collector struct {
db *database.DB db *database.DB
store *storage.S3 store *storage.S3
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) { func (c *Collector) sweep(ctx context.Context) {
start := time.Now() start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx) orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil { if err != nil {
slog.Error("gc: find orphaned blobs", "error", err) slog.Error("gc: find orphaned blobs", "error", err)
return return
+5
View File
@@ -50,6 +50,11 @@ export function Dashboard() {
value={formatNumber(stats.total_blobs_deduped)} value={formatNumber(stats.total_blobs_deduped)}
sub="shared blobs" sub="shared blobs"
/> />
<StatsCard
label="Bandwidth Saved"
value={formatBytes(stats.bandwidth_saved_30d)}
sub="last 30 days"
/>
</div> </div>
{health && ( {health && (