Compare commits

...

3 Commits

Author SHA1 Message Date
unkinben 8fc1635d11 fix: add a grace period before GC deletes orphaned blobs
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful
FindOrphanedBlobs returned any unreferenced blob, so a concurrent dedup
upload (which inserts the blob row before its artifact/local_files row)
could have its S3 object deleted mid-flight. Restrict collection to blobs
older than a 1h grace period.

Refs #71
2026-07-02 00:30:11 +10:00
unkinben 8d9bc1c422 feat: add bandwidth saved stat to dashboard (#65)
ci/woodpecker/tag/docker Pipeline was successful
Shows total bytes served from cache (instead of upstream) over the last 30 days. Queries `SUM(size_bytes) WHERE cache_hit = TRUE` from access_log.

Reviewed-on: #65
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 22:18:02 +10:00
unkinben 30b7cef026 fix: strip base URL path prefix from helm chart download URLs (#64)
ci/woodpecker/tag/docker Pipeline was successful
When a helm repo base URL includes a path component (e.g. \`stakater.github.io/stakater-charts\`), the merger was extracting the full URL path (\`stakater-charts/reloader-2.2.8.tgz\`) and the proxy then constructed \`base_url/stakater-charts/reloader-2.2.8.tgz\` = double path = 404.

Fix: \`extractPathRelativeToBase()\` strips the shared base path prefix so only the filename portion is used as the proxy path.
Reviewed-on: #64
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-27 08:02:52 +10:00
5 changed files with 43 additions and 5 deletions
+9 -3
View File
@@ -109,16 +109,22 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach
return err
}
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.content_hash NOT IN (
WHERE b.created_at < $1
AND b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`)
`, cutoff)
if err != nil {
return nil, err
}
+9
View File
@@ -30,6 +30,15 @@ func (db *DB) GetOverviewStats(ctx context.Context) (*models.OverviewStats, erro
return nil, err
}
err = db.Pool.QueryRow(ctx, `
SELECT COALESCE(SUM(size_bytes), 0)
FROM access_log
WHERE cache_hit = TRUE AND created_at > NOW() - INTERVAL '30 days'
`).Scan(&stats.BandwidthSaved30d)
if err != nil {
return nil, err
}
return &stats, nil
}
+6 -1
View File
@@ -9,6 +9,11 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage"
)
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct {
db *database.DB
store *storage.S3
@@ -38,7 +43,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) {
start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx)
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
if err != nil {
slog.Error("gc: find orphaned blobs", "error", err)
return
+14 -1
View File
@@ -65,11 +65,12 @@ func (m *HelmMerger) MergeIndexes(members []MemberIndex, proxyBaseURL string) ([
if baseHost != "" && extractHost(u) != baseHost {
continue
}
relPath := extractPathRelativeToBase(u, member.BaseURL)
ver.URLs[i] = fmt.Sprintf("%s/api/v1/%s/%s/%s",
strings.TrimRight(proxyBaseURL, "/"),
routePrefix,
member.RemoteName,
extractPath(u))
relPath)
} else {
ver.URLs[i] = fmt.Sprintf("%s/api/v1/%s/%s/%s",
strings.TrimRight(proxyBaseURL, "/"),
@@ -102,6 +103,18 @@ func extractHost(rawURL string) string {
return rest[:slashIdx]
}
func extractPathRelativeToBase(rawURL, baseURL string) string {
fullPath := extractPath(rawURL)
basePath := extractPath(baseURL)
if basePath != "" {
basePath = strings.TrimRight(basePath, "/") + "/"
if strings.HasPrefix(fullPath, basePath) {
return fullPath[len(basePath):]
}
}
return fullPath
}
func extractPath(rawURL string) string {
idx := strings.Index(rawURL, "://")
if idx == -1 {
+5
View File
@@ -50,6 +50,11 @@ export function Dashboard() {
value={formatNumber(stats.total_blobs_deduped)}
sub="shared blobs"
/>
<StatsCard
label="Bandwidth Saved"
value={formatBytes(stats.bandwidth_saved_30d)}
sub="last 30 days"
/>
</div>
{health && (