Feat/v3 go rewrite (#47)
ci/woodpecker/tag/docker Pipeline was successful

Complete rewrite of ArtifactAPI from Python/FastAPI to Go as a single binary.

Core engine:
- 10 package providers: generic, docker, helm, pypi, npm, rpm, alpine,
  puppet, terraform, goproxy — each with built-in mutable patterns
- Content-addressable storage (SHA256 dedup across all remotes)
- Three-tier caching: Redis (TTL/locks) → S3/MinIO (blobs) → upstream
- Classifier with allowlist/blocklist per-remote (empty = allow all)
- Circuit breaker, conditional revalidation, stale-on-error
- Background garbage collection for orphaned blobs
- Access logging to PostgreSQL

API:
- v1 proxy endpoints (backwards compatible)
- v2 management API: CRUD remotes/virtuals, object browser, stats,
  health, SSE events, probe/test endpoint
- Virtual repos with index merging (Helm YAML + PyPI HTML)

Frontend (React + Vite, separate Dockerfile):
- Dashboard with stats, health indicators, top remotes
- Remotes list with type filter, remote detail with config/patterns
- Object browser with pagination and evict
- Test Remote page: probe any remote path, see headers/size/timing
- Virtuals page with expandable member lists

TUI (Bubble Tea):
- Dashboard, remotes list/detail, object browser, virtuals
- Vim-style navigation, artifactapi tui --endpoint <url>

Infrastructure:
- S3 client supports MinIO, Ceph RGW, AWS S3 (minio-go)
- PostgreSQL schema with migrations
- Docker Compose: API + UI + Postgres 17 + Redis 7 + MinIO
- Makefile with Go version check, build/test/lint/fmt/e2e targets
- Distroless Docker image (~15MB)

Testing:
- Unit tests for models, classifier, providers, mergers
- E2E tests with testcontainers-go (real Postgres/Redis/MinIO)

Terraform config:
- All 40 production remotes + helm virtual as HCL
- Provider repo: terraform-provider-artifactapi v0.0.1 (separate)

---------

Co-authored-by: Ben Vincent <ben@unkin.net>
Reviewed-on: #47
This commit was merged in pull request #47.
This commit is contained in:
2026-06-07 19:30:35 +10:00
parent f25bf6cb29
commit b46c116f6b
160 changed files with 11448 additions and 7907 deletions
+60
View File
@@ -0,0 +1,60 @@
package proxy
import (
"context"
"time"
"git.unkin.net/unkin/artifactapi/internal/cache"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
const (
defaultCircuitThreshold = 5
defaultCircuitCooldown = 60 * time.Second
)
type CircuitBreaker struct {
cache *cache.Redis
threshold int64
cooldown time.Duration
}
func NewCircuitBreaker(c *cache.Redis) *CircuitBreaker {
return &CircuitBreaker{
cache: c,
threshold: defaultCircuitThreshold,
cooldown: defaultCircuitCooldown,
}
}
func (cb *CircuitBreaker) IsOpen(ctx context.Context, remote string) bool {
failures, err := cb.cache.GetCircuitFailures(ctx, remote)
if err != nil {
return false
}
return failures >= cb.threshold
}
func (cb *CircuitBreaker) RecordFailure(ctx context.Context, remote string) {
cb.cache.IncrCircuitFailure(ctx, remote, cb.cooldown)
}
func (cb *CircuitBreaker) RecordSuccess(ctx context.Context, remote string) {
cb.cache.ResetCircuit(ctx, remote)
}
func (cb *CircuitBreaker) Health(ctx context.Context, remote string) models.RemoteHealth {
failures, err := cb.cache.GetCircuitFailures(ctx, remote)
if err != nil {
return models.RemoteHealth{Status: "unknown"}
}
switch {
case failures == 0:
return models.RemoteHealth{Status: "healthy", ConsecutiveFailures: int(failures)}
case failures < cb.threshold:
return models.RemoteHealth{Status: "degraded", ConsecutiveFailures: int(failures)}
default:
return models.RemoteHealth{Status: "down", ConsecutiveFailures: int(failures)}
}
}
+14
View File
@@ -0,0 +1,14 @@
package proxy_test
import (
"testing"
"git.unkin.net/unkin/artifactapi/internal/proxy"
)
func TestCircuitBreaker_New(t *testing.T) {
cb := proxy.NewCircuitBreaker(nil)
if cb == nil {
t.Fatal("expected non-nil circuit breaker")
}
}
+80
View File
@@ -0,0 +1,80 @@
package proxy
import (
"regexp"
"git.unkin.net/unkin/artifactapi/internal/provider"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
type Classification int
const (
ClassImmutable Classification = iota
ClassMutable
ClassDenied
)
func (c Classification) String() string {
switch c {
case ClassImmutable:
return "immutable"
case ClassMutable:
return "mutable"
case ClassDenied:
return "denied"
default:
return "unknown"
}
}
type Classifier struct {
provider provider.Provider
}
func NewClassifier(p provider.Provider) *Classifier {
return &Classifier{provider: p}
}
func (c *Classifier) Classify(remote models.Remote, path string) Classification {
if matchesAny(path, compilePatterns(remote.Blocklist)) {
return ClassDenied
}
if len(remote.Patterns) > 0 && !matchesAny(path, compilePatterns(remote.Patterns)) {
return ClassDenied
}
if matchesAny(path, compilePatterns(remote.ImmutablePatterns)) {
return ClassImmutable
}
if matchesAny(path, compilePatterns(remote.MutablePatterns)) {
return ClassMutable
}
if c.provider.Classify(path) == provider.Mutable {
return ClassMutable
}
return ClassImmutable
}
func compilePatterns(patterns []string) []*regexp.Regexp {
compiled := make([]*regexp.Regexp, 0, len(patterns))
for _, p := range patterns {
if re, err := regexp.Compile(p); err == nil {
compiled = append(compiled, re)
}
}
return compiled
}
func matchesAny(path string, patterns []*regexp.Regexp) bool {
for _, re := range patterns {
if re.MatchString(path) {
return true
}
}
return false
}
+129
View File
@@ -0,0 +1,129 @@
package proxy_test
import (
"testing"
"git.unkin.net/unkin/artifactapi/internal/provider/docker"
"git.unkin.net/unkin/artifactapi/internal/provider/generic"
"git.unkin.net/unkin/artifactapi/internal/provider/helm"
"git.unkin.net/unkin/artifactapi/internal/provider/rpm"
"git.unkin.net/unkin/artifactapi/internal/proxy"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
func TestClassifier_EmptyPatternsAllowsAll(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{Name: "test"}
if c.Classify(remote, "any/path") == proxy.ClassDenied {
t.Error("empty patterns should allow all paths")
}
}
func TestClassifier_PatternsActAsAllowlist(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{
Name: "test",
Patterns: []string{`^releases/`},
}
if c.Classify(remote, "releases/v1.0/app.tar.gz") == proxy.ClassDenied {
t.Error("path matching patterns should be allowed")
}
if c.Classify(remote, "uploads/other.tar.gz") != proxy.ClassDenied {
t.Error("path not matching patterns should be denied")
}
}
func TestClassifier_BlocklistDenies(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{
Name: "test",
Blocklist: []string{`\.exe$`},
}
if c.Classify(remote, "malware.exe") != proxy.ClassDenied {
t.Error("blocklist match should deny")
}
if c.Classify(remote, "legit.tar.gz") == proxy.ClassDenied {
t.Error("non-blocked path should be allowed")
}
}
func TestClassifier_BlocklistBeforePatterns(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{
Name: "test",
Patterns: []string{`^releases/`},
Blocklist: []string{`releases/v0\.1/`},
}
if c.Classify(remote, "releases/v0.1/app.tar.gz") != proxy.ClassDenied {
t.Error("blocklist should take priority")
}
}
func TestClassifier_GenericAllImmutable(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{Name: "test"}
if c.Classify(remote, "any/file.tar.gz") != proxy.ClassImmutable {
t.Error("generic provider should classify everything as immutable")
}
}
func TestClassifier_GenericMutableOverride(t *testing.T) {
c := proxy.NewClassifier(&generic.Provider{})
remote := models.Remote{
Name: "test",
MutablePatterns: []string{`/archive/refs/heads/`},
}
if c.Classify(remote, "repo/archive/refs/heads/main.tar.gz") != proxy.ClassMutable {
t.Error("mutable_patterns should override provider default")
}
if c.Classify(remote, "repo/releases/v1.0.tar.gz") != proxy.ClassImmutable {
t.Error("non-mutable path should stay immutable")
}
}
func TestClassifier_ImmutableOverride(t *testing.T) {
c := proxy.NewClassifier(&helm.Provider{})
remote := models.Remote{
Name: "test",
ImmutablePatterns: []string{`special-index\.yaml$`},
}
if c.Classify(remote, "special-index.yaml") != proxy.ClassImmutable {
t.Error("immutable_patterns should force immutable even for normally mutable paths")
}
}
func TestClassifier_HelmAutoClassifies(t *testing.T) {
c := proxy.NewClassifier(&helm.Provider{})
remote := models.Remote{Name: "test"}
if c.Classify(remote, "index.yaml") != proxy.ClassMutable {
t.Error("helm should auto-classify index.yaml as mutable")
}
if c.Classify(remote, "chart-1.0.tgz") != proxy.ClassImmutable {
t.Error("helm should auto-classify .tgz as immutable")
}
}
func TestClassifier_DockerAutoClassifies(t *testing.T) {
c := proxy.NewClassifier(&docker.Provider{})
remote := models.Remote{Name: "test"}
if c.Classify(remote, "library/nginx/manifests/latest") != proxy.ClassMutable {
t.Error("docker should classify tag manifest as mutable")
}
if c.Classify(remote, "library/nginx/manifests/sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890") != proxy.ClassImmutable {
t.Error("docker should classify digest manifest as immutable")
}
if c.Classify(remote, "library/nginx/blobs/sha256:abc") != proxy.ClassImmutable {
t.Error("docker should classify blobs as immutable")
}
}
func TestClassifier_RPMAutoClassifies(t *testing.T) {
c := proxy.NewClassifier(&rpm.Provider{})
remote := models.Remote{Name: "test"}
if c.Classify(remote, "repodata/primary.xml.gz") != proxy.ClassMutable {
t.Error("rpm should classify repodata as mutable")
}
if c.Classify(remote, "packages/foo-1.0.rpm") != proxy.ClassImmutable {
t.Error("rpm should classify .rpm as immutable")
}
}
+341
View File
@@ -0,0 +1,341 @@
package proxy
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"log/slog"
"net/http"
"time"
"git.unkin.net/unkin/artifactapi/internal/cache"
"git.unkin.net/unkin/artifactapi/internal/database"
"git.unkin.net/unkin/artifactapi/internal/provider"
"git.unkin.net/unkin/artifactapi/internal/storage"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
const fetchLockTTL = 30 * time.Second
type Engine struct {
db *database.DB
cache *cache.Redis
store *storage.S3
cas *storage.CAS
}
func NewEngine(db *database.DB, c *cache.Redis, s *storage.S3) *Engine {
return &Engine{
db: db,
cache: c,
store: s,
cas: storage.NewCAS(s),
}
}
type FetchResult struct {
Reader io.ReadCloser
ContentType string
Size int64
Source string // "cache" or "remote"
}
func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, prov provider.Provider) (*FetchResult, error) {
classifier := NewClassifier(prov)
class := classifier.Classify(remote, path)
if class == ClassDenied {
return nil, &ProxyError{Status: http.StatusForbidden, Message: "access denied"}
}
ttl := e.ttlFor(remote, class)
fresh, err := e.cache.CheckTTL(ctx, remote.Name, path)
if err != nil {
slog.Warn("redis check failed, treating as miss", "error", err)
}
if fresh {
result, err := e.serveFromStore(ctx, remote, path)
if err == nil {
result.Source = "cache"
go e.logAccess(remote.Name, path, true, result.Size, 0)
return result, nil
}
slog.Warn("cache hit but S3 miss, re-fetching", "remote", remote.Name, "path", path)
}
locked, err := e.cache.AcquireLock(ctx, remote.Name, path, fetchLockTTL)
if err != nil {
slog.Warn("lock acquire failed", "error", err)
}
if !locked {
time.Sleep(500 * time.Millisecond)
result, err := e.serveFromStore(ctx, remote, path)
if err == nil {
result.Source = "cache"
go e.logAccess(remote.Name, path, true, result.Size, 0)
return result, nil
}
}
if locked {
defer e.cache.ReleaseLock(ctx, remote.Name, path)
}
if class == ClassMutable && remote.CheckMutable {
etag, _ := e.cache.GetETag(ctx, remote.Name, path)
if etag != "" {
notModified, err := e.checkUpstream(ctx, remote, path, etag, prov)
if err == nil && notModified {
_ = e.cache.SetTTL(ctx, remote.Name, path, ttl)
_ = e.cache.SetETag(ctx, remote.Name, path, etag, ttl)
result, err := e.serveFromStore(ctx, remote, path)
if err == nil {
result.Source = "cache"
go e.logAccess(remote.Name, path, true, result.Size, 0)
return result, nil
}
}
}
}
start := time.Now()
result, err := e.fetchFromUpstream(ctx, remote, path, prov, class, ttl)
upstreamMS := int(time.Since(start).Milliseconds())
if err != nil {
if remote.StaleOnError && isNetworkError(err) {
_ = e.cache.SetTTL(ctx, remote.Name, path, ttl)
stale, serr := e.serveFromStore(ctx, remote, path)
if serr == nil {
slog.Warn("serving stale on upstream error", "remote", remote.Name, "path", path, "error", err)
stale.Source = "cache"
go e.logAccess(remote.Name, path, true, stale.Size, 0)
return stale, nil
}
}
return nil, err
}
go e.logAccess(remote.Name, path, false, result.Size, upstreamMS)
return result, nil
}
func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, path string, prov provider.Provider, class Classification, ttl time.Duration) (*FetchResult, error) {
url := prov.UpstreamURL(remote, path)
authHeaders, err := prov.AuthHeaders(ctx, remote)
if err != nil {
return nil, fmt.Errorf("auth headers: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
for k, vv := range authHeaders {
for _, v := range vv {
req.Header.Add(k, v)
}
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, &UpstreamError{Err: err}
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, &ProxyError{Status: resp.StatusCode, Message: fmt.Sprintf("upstream returned %d", resp.StatusCode)}
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("read upstream body: %w", err)
}
rewritten, err := prov.RewriteResponse(body, remote, "")
if err != nil {
return nil, fmt.Errorf("rewrite response: %w", err)
}
if rewritten != nil {
body = rewritten
}
contentType := prov.ContentType(path)
if ct := resp.Header.Get("Content-Type"); ct != "" && contentType == "application/octet-stream" {
contentType = ct
}
if class == ClassMutable {
s3Key := storage.IndexKey(remote.Name, path)
if err := e.store.Upload(ctx, s3Key, bytesReader(body), int64(len(body)), contentType); err != nil {
return nil, fmt.Errorf("upload index: %w", err)
}
etag := resp.Header.Get("ETag")
_ = e.cache.SetTTL(ctx, remote.Name, path, ttl)
if etag != "" {
_ = e.cache.SetETag(ctx, remote.Name, path, etag, ttl)
}
} else {
hash := sha256Hash(body)
s3Key := storage.BlobKey(hash)
exists, _ := e.store.Exists(ctx, s3Key)
if !exists {
if err := e.store.Upload(ctx, s3Key, bytesReader(body), int64(len(body)), contentType); err != nil {
return nil, fmt.Errorf("upload blob: %w", err)
}
}
contentHash := fmt.Sprintf("sha256:%s", hash)
if err := e.db.UpsertBlob(ctx, contentHash, s3Key, int64(len(body)), contentType); err != nil {
slog.Warn("upsert blob failed", "error", err)
}
if err := e.db.UpsertArtifact(ctx, remote.Name, path, contentHash, resp.Header.Get("ETag")); err != nil {
slog.Warn("upsert artifact failed", "error", err)
}
_ = e.cache.SetTTL(ctx, remote.Name, path, ttl)
if etag := resp.Header.Get("ETag"); etag != "" {
_ = e.cache.SetETag(ctx, remote.Name, path, etag, ttl)
}
}
return &FetchResult{
Reader: io.NopCloser(bytesReader(body)),
ContentType: contentType,
Size: int64(len(body)),
Source: "remote",
}, nil
}
func (e *Engine) serveFromStore(ctx context.Context, remote models.Remote, path string) (*FetchResult, error) {
artifact, err := e.db.GetArtifact(ctx, remote.Name, path)
if err == nil && artifact != nil {
reader, info, err := e.store.Download(ctx, artifact.ContentHash[len("sha256:"):])
if err == nil {
_ = e.db.TouchArtifactAccess(ctx, remote.Name, path)
return &FetchResult{
Reader: reader,
ContentType: info.ContentType,
Size: info.Size,
}, nil
}
s3Key := storage.BlobKey(artifact.ContentHash[len("sha256:"):])
reader, info, err = e.store.Download(ctx, s3Key)
if err == nil {
_ = e.db.TouchArtifactAccess(ctx, remote.Name, path)
return &FetchResult{
Reader: reader,
ContentType: info.ContentType,
Size: info.Size,
}, nil
}
}
s3Key := storage.IndexKey(remote.Name, path)
reader, info, err := e.store.Download(ctx, s3Key)
if err != nil {
return nil, fmt.Errorf("not in store: %w", err)
}
return &FetchResult{
Reader: reader,
ContentType: info.ContentType,
Size: info.Size,
}, nil
}
func (e *Engine) checkUpstream(ctx context.Context, remote models.Remote, path, etag string, prov provider.Provider) (bool, error) {
url := prov.UpstreamURL(remote, path)
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
if err != nil {
return false, err
}
req.Header.Set("If-None-Match", etag)
authHeaders, err := prov.AuthHeaders(ctx, remote)
if err != nil {
return false, err
}
for k, vv := range authHeaders {
for _, v := range vv {
req.Header.Add(k, v)
}
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return false, &UpstreamError{Err: err}
}
resp.Body.Close()
return resp.StatusCode == http.StatusNotModified, nil
}
func (e *Engine) ttlFor(remote models.Remote, class Classification) time.Duration {
switch class {
case ClassImmutable:
if remote.ImmutableTTL == 0 {
return 0
}
return time.Duration(remote.ImmutableTTL) * time.Second
default:
return time.Duration(remote.MutableTTL) * time.Second
}
}
func (e *Engine) logAccess(remoteName, path string, cacheHit bool, size int64, upstreamMS int) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = e.db.InsertAccessLog(ctx, remoteName, path, cacheHit, size, upstreamMS, "")
}
func sha256Hash(data []byte) string {
h := sha256.Sum256(data)
return hex.EncodeToString(h[:])
}
func bytesReader(data []byte) io.Reader {
return io.NewSectionReader(readerAt(data), 0, int64(len(data)))
}
type readerAt []byte
func (r readerAt) ReadAt(p []byte, off int64) (n int, err error) {
if off >= int64(len(r)) {
return 0, io.EOF
}
n = copy(p, r[off:])
if off+int64(n) >= int64(len(r)) {
err = io.EOF
}
return
}
type ProxyError struct {
Status int
Message string
}
func (e *ProxyError) Error() string { return e.Message }
type UpstreamError struct {
Err error
}
func (e *UpstreamError) Error() string { return fmt.Sprintf("upstream error: %v", e.Err) }
func (e *UpstreamError) Unwrap() error { return e.Err }
func isNetworkError(err error) bool {
if _, ok := err.(*UpstreamError); ok {
return true
}
return false
}