Compare commits

..

2 Commits

Author SHA1 Message Date
unkinben 7b13644421 feat: virtual PyPI repos can merge local + remote members (#51)
ci/woodpecker/tag/docker Pipeline was successful
## Summary
- Virtual engine detects local members and generates indexes in-memory
- MemberIndex.RepoType drives correct URL prefix in merged output
- PyPI merger rewrites links to /api/v1/local/ or /api/v1/remote/ appropriately
- Includes local PyPI support (cherry-picked from #50)

## Test plan
- [x] Upload wheel to local PyPI → install from direct local URL
- [x] Create virtual with local + remote → install from virtual URL
- [x] Both paths produce correct absolute download URLs

Reviewed-on: #51
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-23 22:20:05 +10:00
unkinben de96637122 feat: add local PyPI repository support (#50)
## Summary
- Upload Python wheels/sdists to local PyPI repos with filename validation
- PEP 503 simple index computed on-demand from stored files
- Package names normalized per PEP 503 (lowercase, hyphens)
- Overwrites rejected (409 Conflict)

## Test plan
- [x] Build wheel with `uv build` → upload → verify simple index HTML → `uv pip install` from local repo
- [x] Bad filename rejection (400)
- [x] Overwrite rejection (409)
- [x] Hash integrity verification on download

Reviewed-on: #50
Co-authored-by: Ben Vincent <ben@unkin.net>
Co-committed-by: Ben Vincent <ben@unkin.net>
2026-06-23 22:13:09 +10:00
7 changed files with 285 additions and 28 deletions
+24 -1
View File
@@ -115,10 +115,15 @@ func (h *ProxyHandler) handleLocal(w http.ResponseWriter, r *http.Request) {
return
}
if remote.PackageType == models.PackageTerraform {
switch remote.PackageType {
case models.PackageTerraform:
if h.serveTerraformMirror(w, r, remote, path) {
return
}
case models.PackagePyPI:
if h.servePyPIMirror(w, r, remote, path) {
return
}
}
h.serveLocalFile(w, r, localName, path)
@@ -149,6 +154,24 @@ func (h *ProxyHandler) serveTerraformMirror(w http.ResponseWriter, r *http.Reque
return false
}
func (h *ProxyHandler) servePyPIMirror(w http.ResponseWriter, r *http.Request, remote *models.Remote, path string) bool {
if path == "simple" || path == "simple/" {
h.local.ServePyPIIndex(w, r, remote.Name)
return true
}
if strings.HasPrefix(path, "simple/") {
pkg := strings.TrimPrefix(path, "simple/")
pkg = strings.TrimSuffix(pkg, "/")
if pkg != "" && !strings.Contains(pkg, "/") {
h.local.ServePyPIPackageIndex(w, r, remote.Name, pkg)
return true
}
}
return false
}
func (h *ProxyHandler) serveLocalFile(w http.ResponseWriter, r *http.Request, repoName, path string) {
file, err := h.db.GetLocalFile(r.Context(), repoName, path)
if err != nil {
+172 -1
View File
@@ -1,6 +1,7 @@
package v2
import (
"context"
"encoding/json"
"errors"
"fmt"
@@ -16,6 +17,37 @@ import (
"git.unkin.net/unkin/artifactapi/pkg/models"
)
var pypiFileRe = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*\.(whl|tar\.gz|zip)$`)
var pypiNormalizeRe = regexp.MustCompile(`[-_.]+`)
func pypiNormalize(name string) string {
return strings.ToLower(pypiNormalizeRe.ReplaceAllString(name, "-"))
}
func pypiPackageFromWheel(filename string) string {
parts := strings.SplitN(filename, "-", 3)
if len(parts) < 2 {
return ""
}
return pypiNormalize(parts[0])
}
func pypiPackageFromSdist(filename string) string {
name := filename
for _, suffix := range []string{".tar.gz", ".zip"} {
if strings.HasSuffix(name, suffix) {
name = strings.TrimSuffix(name, suffix)
break
}
}
idx := strings.LastIndex(name, "-")
if idx <= 0 {
return ""
}
return pypiNormalize(name[:idx])
}
var providerZipRe = regexp.MustCompile(
`^terraform-provider-([a-zA-Z0-9_-]+)_([0-9]+\.[0-9]+\.[0-9]+(?:-[a-zA-Z0-9.]+)?)_([a-z0-9]+)_([a-z0-9]+)\.zip$`,
)
@@ -61,9 +93,13 @@ func (h *LocalHandler) upload(w http.ResponseWriter, r *http.Request) {
return
}
if remote.PackageType == models.PackageTerraform {
switch remote.PackageType {
case models.PackageTerraform:
h.uploadTerraformProvider(w, r, remote, filePath)
return
case models.PackagePyPI:
h.uploadPyPI(w, r, remote, filePath)
return
}
h.uploadGeneric(w, r, remote, filePath)
@@ -184,6 +220,141 @@ func (h *LocalHandler) uploadGeneric(w http.ResponseWriter, r *http.Request, rem
})
}
func (h *LocalHandler) uploadPyPI(w http.ResponseWriter, r *http.Request, remote *models.Remote, filePath string) {
filename := filePath
if idx := strings.LastIndex(filePath, "/"); idx >= 0 {
filename = filePath[idx+1:]
}
if !pypiFileRe.MatchString(filename) {
http.Error(w, fmt.Sprintf("filename %q must be a .whl, .tar.gz, or .zip file", filename), http.StatusBadRequest)
return
}
var pkgName string
if strings.HasSuffix(filename, ".whl") {
pkgName = pypiPackageFromWheel(filename)
} else {
pkgName = pypiPackageFromSdist(filename)
}
if pkgName == "" {
http.Error(w, fmt.Sprintf("cannot parse package name from %q", filename), http.StatusBadRequest)
return
}
storagePath := pkgName + "/" + filename
existing, err := h.db.GetLocalFile(r.Context(), remote.Name, storagePath)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if existing != nil {
http.Error(w, fmt.Sprintf("file %q already exists; overwrites are not allowed", storagePath), http.StatusConflict)
return
}
contentType := "application/zip"
if strings.HasSuffix(filename, ".tar.gz") {
contentType = "application/gzip"
}
result, err := h.cas.Store(r.Context(), r.Body, contentType)
if err != nil {
http.Error(w, fmt.Sprintf("store failed: %v", err), http.StatusInternalServerError)
return
}
if err := h.db.UpsertBlob(r.Context(), result.ContentHash, result.S3Key, result.SizeBytes, contentType); err != nil {
http.Error(w, fmt.Sprintf("record blob: %v", err), http.StatusInternalServerError)
return
}
if err := h.db.CreateLocalFile(r.Context(), remote.Name, storagePath, result.ContentHash); err != nil {
if errors.Is(err, database.ErrAlreadyExists) {
http.Error(w, fmt.Sprintf("file %q already exists; overwrites are not allowed", storagePath), http.StatusConflict)
return
}
http.Error(w, fmt.Sprintf("record file: %v", err), http.StatusInternalServerError)
return
}
writeJSON(w, http.StatusCreated, map[string]any{
"package": pkgName,
"filename": filename,
"content_hash": result.ContentHash,
"size_bytes": result.SizeBytes,
})
}
func (h *LocalHandler) ServePyPIIndex(w http.ResponseWriter, r *http.Request, repoName string) {
packages, err := h.db.ListLocalFilePackages(r.Context(), repoName)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var b strings.Builder
b.WriteString("<!DOCTYPE html>\n<html><body>\n")
for _, pkg := range packages {
fmt.Fprintf(&b, "<a href=\"%s/\">%s</a>\n", pkg, pkg)
}
b.WriteString("</body></html>\n")
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(http.StatusOK)
io.WriteString(w, b.String())
}
func (h *LocalHandler) ServePyPIPackageIndex(w http.ResponseWriter, r *http.Request, repoName, packageName string) {
normalized := pypiNormalize(packageName)
prefix := normalized + "/"
files, err := h.db.ListLocalFilesByPrefix(r.Context(), repoName, prefix)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if len(files) == 0 {
http.Error(w, "not found", http.StatusNotFound)
return
}
var b strings.Builder
b.WriteString("<!DOCTYPE html>\n<html><body>\n")
for _, f := range files {
filename := strings.TrimPrefix(f.FilePath, normalized+"/")
hash := strings.TrimPrefix(f.ContentHash, "sha256:")
fmt.Fprintf(&b, "<a href=\"../../%s/%s#sha256=%s\">%s</a>\n",
normalized, filename, hash, filename)
}
b.WriteString("</body></html>\n")
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(http.StatusOK)
io.WriteString(w, b.String())
}
func (h *LocalHandler) GeneratePyPIPackageHTML(ctx context.Context, repoName, packageName string) ([]byte, error) {
normalized := pypiNormalize(packageName)
prefix := normalized + "/"
files, err := h.db.ListLocalFilesByPrefix(ctx, repoName, prefix)
if err != nil {
return nil, err
}
var b strings.Builder
b.WriteString("<!DOCTYPE html>\n<html><body>\n")
for _, f := range files {
filename := strings.TrimPrefix(f.FilePath, normalized+"/")
hash := strings.TrimPrefix(f.ContentHash, "sha256:")
fmt.Fprintf(&b, "<a href=\"%s/%s#sha256=%s\">%s</a>\n",
normalized, filename, hash, filename)
}
b.WriteString("</body></html>\n")
return []byte(b.String()), nil
}
func (h *LocalHandler) download(w http.ResponseWriter, r *http.Request) {
repoName := chi.URLParam(r, "name")
filePath := chi.URLParam(r, "*")
+23
View File
@@ -99,6 +99,29 @@ func (db *DB) ListLocalFilesByPrefix(ctx context.Context, repoName, prefix strin
return files, rows.Err()
}
func (db *DB) ListLocalFilePackages(ctx context.Context, repoName string) ([]string, error) {
rows, err := db.Pool.Query(ctx, `
SELECT DISTINCT split_part(file_path, '/', 1)
FROM local_files
WHERE repo_name = $1
ORDER BY 1
`, repoName)
if err != nil {
return nil, err
}
defer rows.Close()
var packages []string
for rows.Next() {
var pkg string
if err := rows.Scan(&pkg); err != nil {
return nil, err
}
packages = append(packages, pkg)
}
return packages, rows.Err()
}
func (db *DB) DeleteLocalFile(ctx context.Context, repoName, filePath string) error {
_, err := db.Pool.Exec(ctx, `DELETE FROM local_files WHERE repo_name = $1 AND file_path = $2`, repoName, filePath)
return err
+23 -22
View File
@@ -34,14 +34,15 @@ import (
)
type Server struct {
cfg *config.Config
router chi.Router
db *database.DB
cache *cache.Redis
store *storage.S3
engine *proxy.Engine
virtEngine *virtual.Engine
gc *gc.Collector
cfg *config.Config
router chi.Router
db *database.DB
cache *cache.Redis
store *storage.S3
engine *proxy.Engine
virtEngine *virtual.Engine
localHandler *v2.LocalHandler
gc *gc.Collector
}
func New(cfg *config.Config) (*Server, error) {
@@ -61,17 +62,19 @@ func New(cfg *config.Config) (*Server, error) {
}
engine := proxy.NewEngine(db, redis, s3)
virtEngine := virtual.NewEngine(db, engine)
localHandler := v2.NewLocalHandler(db, s3)
virtEngine := virtual.NewEngine(db, engine, localHandler)
collector := gc.New(db, s3, 1*time.Hour)
s := &Server{
cfg: cfg,
db: db,
cache: redis,
store: s3,
engine: engine,
virtEngine: virtEngine,
gc: collector,
cfg: cfg,
db: db,
cache: redis,
store: s3,
engine: engine,
virtEngine: virtEngine,
localHandler: localHandler,
gc: collector,
}
s.router = s.routes()
@@ -91,9 +94,7 @@ func (s *Server) routes() chi.Router {
r.Get("/health", s.handleHealth)
r.Get("/", s.handleRoot)
localHandler := v2.NewLocalHandler(s.db, s.store)
proxyHandler := v1.NewProxyHandler(s.engine, s.virtEngine, s.db, s.store, localHandler)
proxyHandler := v1.NewProxyHandler(s.engine, s.virtEngine, s.db, s.store, s.localHandler)
r.Mount("/api/v1", proxyHandler.Routes())
remotesHandler := v2.NewRemotesHandler(s.db)
@@ -118,9 +119,9 @@ func (s *Server) routes() chi.Router {
})
r.Route("/remotes/{name}/files", func(r chi.Router) {
r.Put("/*", localHandler.Routes().ServeHTTP)
r.Get("/*", localHandler.Routes().ServeHTTP)
r.Delete("/*", localHandler.Routes().ServeHTTP)
r.Put("/*", s.localHandler.Routes().ServeHTTP)
r.Get("/*", s.localHandler.Routes().ServeHTTP)
r.Delete("/*", s.localHandler.Routes().ServeHTTP)
})
})
+36 -3
View File
@@ -5,6 +5,7 @@ import (
"fmt"
"io"
"log/slog"
"strings"
"sync"
"git.unkin.net/unkin/artifactapi/internal/database"
@@ -13,13 +14,18 @@ import (
"git.unkin.net/unkin/artifactapi/pkg/models"
)
type LocalIndexGenerator interface {
GeneratePyPIPackageHTML(ctx context.Context, repoName, packageName string) ([]byte, error)
}
type Engine struct {
db *database.DB
proxyEngine *proxy.Engine
localGen LocalIndexGenerator
}
func NewEngine(db *database.DB, proxyEngine *proxy.Engine) *Engine {
return &Engine{db: db, proxyEngine: proxyEngine}
func NewEngine(db *database.DB, proxyEngine *proxy.Engine, localGen LocalIndexGenerator) *Engine {
return &Engine{db: db, proxyEngine: proxyEngine, localGen: localGen}
}
func (e *Engine) Fetch(ctx context.Context, virt models.Virtual, path string, proxyBaseURL string) ([]byte, string, error) {
@@ -73,6 +79,16 @@ func (e *Engine) fetchMemberIndexes(ctx context.Context, virt models.Virtual, pa
return
}
if remote.RepoType == models.RepoTypeLocal {
body, err := e.fetchLocalIndex(ctx, *remote, virt.PackageType, path)
if err != nil {
results[idx] = result{err: fmt.Errorf("local index %q: %w", name, err)}
return
}
results[idx] = result{index: MemberIndex{RemoteName: name, RepoType: remote.RepoType, Body: body}}
return
}
prov, err := provider.Get(remote.PackageType)
if err != nil {
results[idx] = result{err: fmt.Errorf("provider %q: %w", remote.PackageType, err)}
@@ -92,7 +108,7 @@ func (e *Engine) fetchMemberIndexes(ctx context.Context, virt models.Virtual, pa
return
}
results[idx] = result{index: MemberIndex{RemoteName: name, Body: body}}
results[idx] = result{index: MemberIndex{RemoteName: name, RepoType: remote.RepoType, Body: body}}
}(i, memberName)
}
@@ -109,3 +125,20 @@ func (e *Engine) fetchMemberIndexes(ctx context.Context, virt models.Virtual, pa
return members, nil
}
func (e *Engine) fetchLocalIndex(ctx context.Context, remote models.Remote, packageType models.PackageType, path string) ([]byte, error) {
switch packageType {
case models.PackagePyPI:
if e.localGen == nil {
return nil, fmt.Errorf("no local index generator configured")
}
parts := strings.SplitN(strings.TrimPrefix(path, "simple/"), "/", 2)
pkgName := strings.TrimSuffix(parts[0], "/")
if pkgName == "" {
return nil, fmt.Errorf("cannot determine package name from path %q", path)
}
return e.localGen.GeneratePyPIPackageHTML(ctx, remote.Name, pkgName)
default:
return nil, fmt.Errorf("local index generation not supported for %q", packageType)
}
}
+1
View File
@@ -8,6 +8,7 @@ import (
type MemberIndex struct {
RemoteName string
RepoType models.RepoType
Body []byte
}
+6 -1
View File
@@ -36,8 +36,13 @@ func (m *PyPIMerger) MergeIndexes(members []MemberIndex, proxyBaseURL string) ([
}
if proxyBaseURL != "" && href != "" {
href = fmt.Sprintf("%s/api/v1/remote/%s/%s",
routePrefix := "remote"
if member.RepoType == "local" {
routePrefix = "local"
}
href = fmt.Sprintf("%s/api/v1/%s/%s/%s",
strings.TrimRight(proxyBaseURL, "/"),
routePrefix,
member.RemoteName,
strings.TrimLeft(href, "/"))
}