Feat/v3 go rewrite (#47)
ci/woodpecker/tag/docker Pipeline was successful

Complete rewrite of ArtifactAPI from Python/FastAPI to Go as a single binary.

Core engine:
- 10 package providers: generic, docker, helm, pypi, npm, rpm, alpine,
  puppet, terraform, goproxy — each with built-in mutable patterns
- Content-addressable storage (SHA256 dedup across all remotes)
- Three-tier caching: Redis (TTL/locks) → S3/MinIO (blobs) → upstream
- Classifier with allowlist/blocklist per-remote (empty = allow all)
- Circuit breaker, conditional revalidation, stale-on-error
- Background garbage collection for orphaned blobs
- Access logging to PostgreSQL

API:
- v1 proxy endpoints (backwards compatible)
- v2 management API: CRUD remotes/virtuals, object browser, stats,
  health, SSE events, probe/test endpoint
- Virtual repos with index merging (Helm YAML + PyPI HTML)

Frontend (React + Vite, separate Dockerfile):
- Dashboard with stats, health indicators, top remotes
- Remotes list with type filter, remote detail with config/patterns
- Object browser with pagination and evict
- Test Remote page: probe any remote path, see headers/size/timing
- Virtuals page with expandable member lists

TUI (Bubble Tea):
- Dashboard, remotes list/detail, object browser, virtuals
- Vim-style navigation, artifactapi tui --endpoint <url>

Infrastructure:
- S3 client supports MinIO, Ceph RGW, AWS S3 (minio-go)
- PostgreSQL schema with migrations
- Docker Compose: API + UI + Postgres 17 + Redis 7 + MinIO
- Makefile with Go version check, build/test/lint/fmt/e2e targets
- Distroless Docker image (~15MB)

Testing:
- Unit tests for models, classifier, providers, mergers
- E2E tests with testcontainers-go (real Postgres/Redis/MinIO)

Terraform config:
- All 40 production remotes + helm virtual as HCL
- Provider repo: terraform-provider-artifactapi v0.0.1 (separate)

---------

Co-authored-by: Ben Vincent <ben@unkin.net>
Reviewed-on: #47
This commit was merged in pull request #47.
This commit is contained in:
2026-06-07 19:30:35 +10:00
parent f25bf6cb29
commit b46c116f6b
160 changed files with 11448 additions and 7907 deletions
+153
View File
@@ -0,0 +1,153 @@
package database
import (
"context"
"time"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
func (db *DB) UpsertBlob(ctx context.Context, contentHash, s3Key string, sizeBytes int64, contentType string) error {
_, err := db.Pool.Exec(ctx, `
INSERT INTO blobs (content_hash, s3_key, size_bytes, content_type)
VALUES ($1, $2, $3, $4)
ON CONFLICT (content_hash) DO NOTHING
`, contentHash, s3Key, sizeBytes, contentType)
return err
}
func (db *DB) UpsertArtifact(ctx context.Context, remoteName, path, contentHash, upstreamETag string) error {
_, err := db.Pool.Exec(ctx, `
INSERT INTO artifacts (remote_name, path, content_hash, upstream_etag)
VALUES ($1, $2, $3, $4)
ON CONFLICT (remote_name, path) DO UPDATE SET
content_hash = EXCLUDED.content_hash,
upstream_etag = EXCLUDED.upstream_etag,
last_fetched_at = NOW(),
fetch_count = artifacts.fetch_count + 1
`, remoteName, path, contentHash, upstreamETag)
return err
}
func (db *DB) GetArtifact(ctx context.Context, remoteName, path string) (*models.Artifact, error) {
row := db.Pool.QueryRow(ctx, `
SELECT a.id, a.remote_name, a.path, a.content_hash, a.upstream_etag,
a.upstream_last_modified, a.first_seen_at, a.last_fetched_at,
a.last_accessed_at, a.fetch_count, a.access_count,
b.size_bytes, b.content_type
FROM artifacts a
JOIN blobs b ON a.content_hash = b.content_hash
WHERE a.remote_name = $1 AND a.path = $2
`, remoteName, path)
var a models.Artifact
err := row.Scan(
&a.ID, &a.RemoteName, &a.Path, &a.ContentHash, &a.UpstreamETag,
&a.UpstreamLastModified, &a.FirstSeenAt, &a.LastFetchedAt,
&a.LastAccessedAt, &a.FetchCount, &a.AccessCount,
&a.SizeBytes, &a.ContentType,
)
if err != nil {
return nil, err
}
return &a, nil
}
func (db *DB) TouchArtifactAccess(ctx context.Context, remoteName, path string) error {
_, err := db.Pool.Exec(ctx, `
UPDATE artifacts SET
last_accessed_at = NOW(),
access_count = access_count + 1
WHERE remote_name = $1 AND path = $2
`, remoteName, path)
return err
}
func (db *DB) ListArtifacts(ctx context.Context, remoteName string, limit, offset int) ([]models.Artifact, error) {
rows, err := db.Pool.Query(ctx, `
SELECT a.id, a.remote_name, a.path, a.content_hash, a.upstream_etag,
a.upstream_last_modified, a.first_seen_at, a.last_fetched_at,
a.last_accessed_at, a.fetch_count, a.access_count,
b.size_bytes, b.content_type
FROM artifacts a
JOIN blobs b ON a.content_hash = b.content_hash
WHERE a.remote_name = $1
ORDER BY a.path
LIMIT $2 OFFSET $3
`, remoteName, limit, offset)
if err != nil {
return nil, err
}
defer rows.Close()
var artifacts []models.Artifact
for rows.Next() {
var a models.Artifact
if err := rows.Scan(
&a.ID, &a.RemoteName, &a.Path, &a.ContentHash, &a.UpstreamETag,
&a.UpstreamLastModified, &a.FirstSeenAt, &a.LastFetchedAt,
&a.LastAccessedAt, &a.FetchCount, &a.AccessCount,
&a.SizeBytes, &a.ContentType,
); err != nil {
return nil, err
}
artifacts = append(artifacts, a)
}
return artifacts, rows.Err()
}
func (db *DB) DeleteArtifact(ctx context.Context, remoteName, path string) error {
_, err := db.Pool.Exec(ctx, `DELETE FROM artifacts WHERE remote_name = $1 AND path = $2`, remoteName, path)
return err
}
func (db *DB) InsertAccessLog(ctx context.Context, remoteName, path string, cacheHit bool, sizeBytes int64, upstreamMS int, clientIP string) error {
_, err := db.Pool.Exec(ctx, `
INSERT INTO access_log (remote_name, path, cache_hit, size_bytes, upstream_ms, client_ip)
VALUES ($1, $2, $3, $4, $5, $6)
`, remoteName, path, cacheHit, sizeBytes, upstreamMS, clientIP)
return err
}
func (db *DB) FindOrphanedBlobs(ctx context.Context) ([]models.Blob, error) {
rows, err := db.Pool.Query(ctx, `
SELECT b.content_hash, b.s3_key, b.size_bytes, b.content_type, b.created_at
FROM blobs b
WHERE b.content_hash NOT IN (
SELECT content_hash FROM artifacts
UNION
SELECT content_hash FROM local_files
)
`)
if err != nil {
return nil, err
}
defer rows.Close()
var blobs []models.Blob
for rows.Next() {
var b models.Blob
if err := rows.Scan(&b.ContentHash, &b.S3Key, &b.SizeBytes, &b.ContentType, &b.CreatedAt); err != nil {
return nil, err
}
blobs = append(blobs, b)
}
return blobs, rows.Err()
}
func (db *DB) DeleteBlob(ctx context.Context, contentHash string) error {
_, err := db.Pool.Exec(ctx, `DELETE FROM blobs WHERE content_hash = $1`, contentHash)
return err
}
func (db *DB) DeleteColdArtifacts(ctx context.Context, remoteName string, olderThan time.Duration) (int64, error) {
cutoff := time.Now().Add(-olderThan)
tag, err := db.Pool.Exec(ctx, `
DELETE FROM artifacts
WHERE remote_name = $1 AND last_accessed_at < $2
`, remoteName, cutoff)
if err != nil {
return 0, err
}
return tag.RowsAffected(), nil
}
+126
View File
@@ -0,0 +1,126 @@
package database
import (
"context"
"fmt"
"github.com/jackc/pgx/v5/pgxpool"
)
type DB struct {
Pool *pgxpool.Pool
}
func New(dsn string) (*DB, error) {
pool, err := pgxpool.New(context.Background(), dsn)
if err != nil {
return nil, fmt.Errorf("connect to postgres: %w", err)
}
if err := pool.Ping(context.Background()); err != nil {
pool.Close()
return nil, fmt.Errorf("ping postgres: %w", err)
}
db := &DB{Pool: pool}
if err := db.migrate(); err != nil {
pool.Close()
return nil, fmt.Errorf("run migrations: %w", err)
}
return db, nil
}
func (db *DB) Close() {
db.Pool.Close()
}
func (db *DB) migrate() error {
ctx := context.Background()
_, err := db.Pool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS remotes (
name TEXT PRIMARY KEY,
package_type TEXT NOT NULL,
base_url TEXT NOT NULL,
description TEXT DEFAULT '',
username TEXT DEFAULT '',
password TEXT DEFAULT '',
immutable_ttl INTEGER DEFAULT 0,
mutable_ttl INTEGER DEFAULT 3600,
check_mutable BOOLEAN DEFAULT TRUE,
patterns TEXT[] DEFAULT '{}',
blocklist TEXT[] DEFAULT '{}',
mutable_patterns TEXT[] DEFAULT '{}',
immutable_patterns TEXT[] DEFAULT '{}',
ban_tags_enabled BOOLEAN DEFAULT FALSE,
ban_tags TEXT[] DEFAULT '{}',
quarantine_enabled BOOLEAN DEFAULT FALSE,
quarantine_days INTEGER DEFAULT 3,
stale_on_error BOOLEAN DEFAULT TRUE,
releases_remote TEXT DEFAULT '',
managed_by TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS virtuals (
name TEXT PRIMARY KEY,
package_type TEXT NOT NULL,
description TEXT DEFAULT '',
members TEXT[] NOT NULL,
managed_by TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS blobs (
content_hash TEXT PRIMARY KEY,
s3_key TEXT NOT NULL,
size_bytes BIGINT NOT NULL,
content_type TEXT DEFAULT 'application/octet-stream',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS artifacts (
id BIGSERIAL PRIMARY KEY,
remote_name TEXT NOT NULL REFERENCES remotes(name) ON DELETE CASCADE,
path TEXT NOT NULL,
content_hash TEXT NOT NULL REFERENCES blobs(content_hash),
upstream_etag TEXT DEFAULT '',
upstream_last_modified TIMESTAMPTZ,
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
last_fetched_at TIMESTAMPTZ DEFAULT NOW(),
last_accessed_at TIMESTAMPTZ DEFAULT NOW(),
fetch_count BIGINT DEFAULT 1,
access_count BIGINT DEFAULT 1,
UNIQUE(remote_name, path)
);
CREATE INDEX IF NOT EXISTS idx_artifacts_remote ON artifacts(remote_name);
CREATE INDEX IF NOT EXISTS idx_artifacts_last_accessed ON artifacts(last_accessed_at);
CREATE TABLE IF NOT EXISTS local_files (
id BIGSERIAL PRIMARY KEY,
repo_name TEXT NOT NULL,
file_path TEXT NOT NULL,
content_hash TEXT NOT NULL REFERENCES blobs(content_hash),
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(repo_name, file_path)
);
CREATE TABLE IF NOT EXISTS access_log (
id BIGSERIAL PRIMARY KEY,
remote_name TEXT NOT NULL,
path TEXT NOT NULL,
cache_hit BOOLEAN NOT NULL,
size_bytes BIGINT DEFAULT 0,
upstream_ms INTEGER DEFAULT 0,
client_ip TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_access_log_remote_time ON access_log(remote_name, created_at);
`)
return err
}
+99
View File
@@ -0,0 +1,99 @@
package database
import (
"context"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
const remoteCols = `name, package_type, base_url, description, username, password,
immutable_ttl, mutable_ttl, check_mutable,
patterns, blocklist, mutable_patterns, immutable_patterns,
ban_tags_enabled, ban_tags,
quarantine_enabled, quarantine_days, stale_on_error,
releases_remote, managed_by, created_at, updated_at`
func scanRemote(scanner interface{ Scan(...any) error }, r *models.Remote) error {
return scanner.Scan(
&r.Name, &r.PackageType, &r.BaseURL, &r.Description, &r.Username, &r.Password,
&r.ImmutableTTL, &r.MutableTTL, &r.CheckMutable,
&r.Patterns, &r.Blocklist, &r.MutablePatterns, &r.ImmutablePatterns,
&r.BanTagsEnabled, &r.BanTags,
&r.QuarantineEnabled, &r.QuarantineDays, &r.StaleOnError,
&r.ReleasesRemote, &r.ManagedBy, &r.CreatedAt, &r.UpdatedAt,
)
}
func (db *DB) GetRemote(ctx context.Context, name string) (*models.Remote, error) {
row := db.Pool.QueryRow(ctx, `SELECT `+remoteCols+` FROM remotes WHERE name = $1`, name)
var r models.Remote
if err := scanRemote(row, &r); err != nil {
return nil, err
}
return &r, nil
}
func (db *DB) ListRemotes(ctx context.Context) ([]models.Remote, error) {
rows, err := db.Pool.Query(ctx, `SELECT `+remoteCols+` FROM remotes ORDER BY name`)
if err != nil {
return nil, err
}
defer rows.Close()
var remotes []models.Remote
for rows.Next() {
var r models.Remote
if err := scanRemote(rows, &r); err != nil {
return nil, err
}
remotes = append(remotes, r)
}
return remotes, rows.Err()
}
func (db *DB) CreateRemote(ctx context.Context, r *models.Remote) error {
_, err := db.Pool.Exec(ctx, `
INSERT INTO remotes (
name, package_type, base_url, description, username, password,
immutable_ttl, mutable_ttl, check_mutable,
patterns, blocklist, mutable_patterns, immutable_patterns,
ban_tags_enabled, ban_tags,
quarantine_enabled, quarantine_days, stale_on_error,
releases_remote, managed_by
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20)
`,
r.Name, r.PackageType, r.BaseURL, r.Description, r.Username, r.Password,
r.ImmutableTTL, r.MutableTTL, r.CheckMutable,
r.Patterns, r.Blocklist, r.MutablePatterns, r.ImmutablePatterns,
r.BanTagsEnabled, r.BanTags,
r.QuarantineEnabled, r.QuarantineDays, r.StaleOnError,
r.ReleasesRemote, r.ManagedBy,
)
return err
}
func (db *DB) UpdateRemote(ctx context.Context, r *models.Remote) error {
_, err := db.Pool.Exec(ctx, `
UPDATE remotes SET
package_type=$2, base_url=$3, description=$4, username=$5, password=$6,
immutable_ttl=$7, mutable_ttl=$8, check_mutable=$9,
patterns=$10, blocklist=$11, mutable_patterns=$12, immutable_patterns=$13,
ban_tags_enabled=$14, ban_tags=$15,
quarantine_enabled=$16, quarantine_days=$17, stale_on_error=$18,
releases_remote=$19, managed_by=$20, updated_at=NOW()
WHERE name=$1
`,
r.Name, r.PackageType, r.BaseURL, r.Description, r.Username, r.Password,
r.ImmutableTTL, r.MutableTTL, r.CheckMutable,
r.Patterns, r.Blocklist, r.MutablePatterns, r.ImmutablePatterns,
r.BanTagsEnabled, r.BanTags,
r.QuarantineEnabled, r.QuarantineDays, r.StaleOnError,
r.ReleasesRemote, r.ManagedBy,
)
return err
}
func (db *DB) DeleteRemote(ctx context.Context, name string) error {
_, err := db.Pool.Exec(ctx, `DELETE FROM remotes WHERE name = $1`, name)
return err
}
+78
View File
@@ -0,0 +1,78 @@
package database
import (
"context"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
func (db *DB) GetOverviewStats(ctx context.Context) (*models.OverviewStats, error) {
var stats models.OverviewStats
err := db.Pool.QueryRow(ctx, `SELECT COUNT(*) FROM remotes`).Scan(&stats.TotalRemotes)
if err != nil {
return nil, err
}
err = db.Pool.QueryRow(ctx, `SELECT COALESCE(COUNT(*), 0), COALESCE(SUM(b.size_bytes), 0)
FROM artifacts a JOIN blobs b ON a.content_hash = b.content_hash`).
Scan(&stats.TotalObjects, &stats.TotalBytes)
if err != nil {
return nil, err
}
err = db.Pool.QueryRow(ctx, `
SELECT COALESCE(
(SELECT COUNT(*) FROM artifacts) - (SELECT COUNT(DISTINCT content_hash) FROM artifacts),
0
)`).Scan(&stats.TotalBlobsDeduped)
if err != nil {
return nil, err
}
return &stats, nil
}
type RemoteStatRow struct {
Name string `json:"name"`
ObjectCount int64 `json:"object_count"`
TotalBytes int64 `json:"total_bytes"`
Requests30d int64 `json:"requests_30d"`
}
func (db *DB) GetTopRemotes(ctx context.Context, limit int) ([]RemoteStatRow, error) {
rows, err := db.Pool.Query(ctx, `
SELECT r.name,
COALESCE(a.cnt, 0) AS object_count,
COALESCE(a.total_bytes, 0) AS total_bytes,
COALESCE(l.req_count, 0) AS requests_30d
FROM remotes r
LEFT JOIN (
SELECT remote_name, COUNT(*) AS cnt, SUM(b.size_bytes) AS total_bytes
FROM artifacts a JOIN blobs b ON a.content_hash = b.content_hash
GROUP BY remote_name
) a ON r.name = a.remote_name
LEFT JOIN (
SELECT remote_name, COUNT(*) AS req_count
FROM access_log
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY remote_name
) l ON r.name = l.remote_name
ORDER BY COALESCE(a.total_bytes, 0) DESC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var result []RemoteStatRow
for rows.Next() {
var r RemoteStatRow
if err := rows.Scan(&r.Name, &r.ObjectCount, &r.TotalBytes, &r.Requests30d); err != nil {
return nil, err
}
result = append(result, r)
}
return result, rows.Err()
}
+64
View File
@@ -0,0 +1,64 @@
package database
import (
"context"
"git.unkin.net/unkin/artifactapi/pkg/models"
)
func (db *DB) GetVirtual(ctx context.Context, name string) (*models.Virtual, error) {
row := db.Pool.QueryRow(ctx, `
SELECT name, package_type, description, members, managed_by, created_at, updated_at
FROM virtuals WHERE name = $1
`, name)
var v models.Virtual
err := row.Scan(&v.Name, &v.PackageType, &v.Description, &v.Members, &v.ManagedBy, &v.CreatedAt, &v.UpdatedAt)
if err != nil {
return nil, err
}
return &v, nil
}
func (db *DB) ListVirtuals(ctx context.Context) ([]models.Virtual, error) {
rows, err := db.Pool.Query(ctx, `
SELECT name, package_type, description, members, managed_by, created_at, updated_at
FROM virtuals ORDER BY name
`)
if err != nil {
return nil, err
}
defer rows.Close()
var virtuals []models.Virtual
for rows.Next() {
var v models.Virtual
if err := rows.Scan(&v.Name, &v.PackageType, &v.Description, &v.Members, &v.ManagedBy, &v.CreatedAt, &v.UpdatedAt); err != nil {
return nil, err
}
virtuals = append(virtuals, v)
}
return virtuals, rows.Err()
}
func (db *DB) CreateVirtual(ctx context.Context, v *models.Virtual) error {
_, err := db.Pool.Exec(ctx, `
INSERT INTO virtuals (name, package_type, description, members, managed_by)
VALUES ($1, $2, $3, $4, $5)
`, v.Name, v.PackageType, v.Description, v.Members, v.ManagedBy)
return err
}
func (db *DB) UpdateVirtual(ctx context.Context, v *models.Virtual) error {
_, err := db.Pool.Exec(ctx, `
UPDATE virtuals SET
package_type=$2, description=$3, members=$4, managed_by=$5, updated_at=NOW()
WHERE name=$1
`, v.Name, v.PackageType, v.Description, v.Members, v.ManagedBy)
return err
}
func (db *DB) DeleteVirtual(ctx context.Context, name string) error {
_, err := db.Pool.Exec(ctx, `DELETE FROM virtuals WHERE name = $1`, name)
return err
}