Compare commits

..

1 Commits

Author SHA1 Message Date
unkinben b698d1bdc0 perf: memoise regex compilation in the classifier
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/pre-commit Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful
Classify runs on every proxied request and recompiled each remote's
pattern lists every time. Cache compiled regexes keyed by pattern text so
each distinct pattern compiles once and is reused thereafter.

Refs #73
2026-07-02 00:33:46 +10:00
3 changed files with 25 additions and 16 deletions
+3 -9
View File
@@ -109,22 +109,16 @@ func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cach
return err
}
// FindOrphanedBlobs returns blobs no longer referenced by any artifact or
// local file, restricted to those created before now()-minAge. The age cutoff
// is a grace period that avoids a TOCTOU race with in-flight dedup uploads,
// which insert the blob row before the referencing artifact/local_files row.
func (db *DB) FindOrphanedBlobs(ctx context.Context, minAge time.Duration) ([]models.Blob, error) {
cutoff := time.Now().Add(-minAge)
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.created_at < $1
AND b.content_hash NOT IN (
WHERE b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`, cutoff)
`)
if err != nil {
return nil, err
}
+1 -6
View File
@@ -9,11 +9,6 @@ import (
"git.unkin.net/unkin/artifactapi/internal/storage"
)
// blobGracePeriod is how old an orphaned blob must be before GC will delete
// it. This avoids racing in-flight dedup uploads that insert the blob row
// before the referencing artifact/local_files row exists.
const blobGracePeriod = 1 * time.Hour
type Collector struct {
db *database.DB
store *storage.S3
@@ -43,7 +38,7 @@ func (c *Collector) Run(ctx context.Context) {
func (c *Collector) sweep(ctx context.Context) {
start := time.Now()
orphaned, err := c.db.FindOrphanedBlobs(ctx, blobGracePeriod)
orphaned, err := c.db.FindOrphanedBlobs(ctx)
if err != nil {
slog.Error("gc: find orphaned blobs", "error", err)
return
+21 -1
View File
@@ -2,6 +2,7 @@ package proxy
import (
"regexp"
"sync"
"git.unkin.net/unkin/artifactapi/internal/provider"
"git.unkin.net/unkin/artifactapi/pkg/models"
@@ -60,10 +61,29 @@ func (c *Classifier) Classify(remote models.Remote, path string) Classification
return ClassImmutable
}
// patternCache memoises regex compilation. Classify runs on every proxied
// request and previously recompiled each remote's pattern lists every time;
// keying by the pattern string lets each distinct pattern compile once and
// then be reused, with no invalidation needed (the pattern text is the key).
// A pattern that fails to compile is cached as a typed nil so we don't retry.
var patternCache sync.Map // map[string]*regexp.Regexp
func compileCached(pattern string) *regexp.Regexp {
if v, ok := patternCache.Load(pattern); ok {
return v.(*regexp.Regexp)
}
re, err := regexp.Compile(pattern)
if err != nil {
re = nil
}
patternCache.Store(pattern, re)
return re
}
func compilePatterns(patterns []string) []*regexp.Regexp {
compiled := make([]*regexp.Regexp, 0, len(patterns))
for _, p := range patterns {
if re, err := regexp.Compile(p); err == nil {
if re := compileCached(p); re != nil {
compiled = append(compiled, re)
}
}