From bb81eafa711dd393e8aa113dd9d58f940ad3223a Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Tue, 23 Jun 2026 22:04:12 +1000 Subject: [PATCH] feat: add local PyPI repository support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upload Python wheels and sdists to local PyPI repos. The simple index (PEP 503) is computed on-demand from stored files. - Upload validates .whl/.tar.gz/.zip filenames, parses and normalizes package names per PEP 503, stores under {package}/{filename} - GET /api/v1/local/{name}/simple/ serves root index listing all packages - GET /api/v1/local/{name}/simple/{pkg}/ serves per-package file listing with sha256 hashes for integrity verification - Files are downloadable at /api/v1/local/{name}/{package}/{filename} - Overwrites rejected with 409 Tested e2e: uv build wheel → upload → uv pip install from local repo --- internal/api/v1/proxy.go | 25 ++++- internal/api/v2/local.go | 167 ++++++++++++++++++++++++++++++- internal/database/local_files.go | 23 +++++ 3 files changed, 213 insertions(+), 2 deletions(-) diff --git a/internal/api/v1/proxy.go b/internal/api/v1/proxy.go index 5f0593b..057c429 100644 --- a/internal/api/v1/proxy.go +++ b/internal/api/v1/proxy.go @@ -115,10 +115,15 @@ func (h *ProxyHandler) handleLocal(w http.ResponseWriter, r *http.Request) { return } - if remote.PackageType == models.PackageTerraform { + switch remote.PackageType { + case models.PackageTerraform: if h.serveTerraformMirror(w, r, remote, path) { return } + case models.PackagePyPI: + if h.servePyPIMirror(w, r, remote, path) { + return + } } h.serveLocalFile(w, r, localName, path) @@ -149,6 +154,24 @@ func (h *ProxyHandler) serveTerraformMirror(w http.ResponseWriter, r *http.Reque return false } +func (h *ProxyHandler) servePyPIMirror(w http.ResponseWriter, r *http.Request, remote *models.Remote, path string) bool { + if path == "simple" || path == "simple/" { + h.local.ServePyPIIndex(w, r, remote.Name) + return true + } + + if strings.HasPrefix(path, "simple/") { + pkg := strings.TrimPrefix(path, "simple/") + pkg = strings.TrimSuffix(pkg, "/") + if pkg != "" && !strings.Contains(pkg, "/") { + h.local.ServePyPIPackageIndex(w, r, remote.Name, pkg) + return true + } + } + + return false +} + func (h *ProxyHandler) serveLocalFile(w http.ResponseWriter, r *http.Request, repoName, path string) { file, err := h.db.GetLocalFile(r.Context(), repoName, path) if err != nil { diff --git a/internal/api/v2/local.go b/internal/api/v2/local.go index d0b96eb..6cb6157 100644 --- a/internal/api/v2/local.go +++ b/internal/api/v2/local.go @@ -1,6 +1,7 @@ package v2 import ( + "context" "encoding/json" "errors" "fmt" @@ -16,6 +17,37 @@ import ( "git.unkin.net/unkin/artifactapi/pkg/models" ) +var pypiFileRe = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*\.(whl|tar\.gz|zip)$`) + +var pypiNormalizeRe = regexp.MustCompile(`[-_.]+`) + +func pypiNormalize(name string) string { + return strings.ToLower(pypiNormalizeRe.ReplaceAllString(name, "-")) +} + +func pypiPackageFromWheel(filename string) string { + parts := strings.SplitN(filename, "-", 3) + if len(parts) < 2 { + return "" + } + return pypiNormalize(parts[0]) +} + +func pypiPackageFromSdist(filename string) string { + name := filename + for _, suffix := range []string{".tar.gz", ".zip"} { + if strings.HasSuffix(name, suffix) { + name = strings.TrimSuffix(name, suffix) + break + } + } + idx := strings.LastIndex(name, "-") + if idx <= 0 { + return "" + } + return pypiNormalize(name[:idx]) +} + var providerZipRe = regexp.MustCompile( `^terraform-provider-([a-zA-Z0-9_-]+)_([0-9]+\.[0-9]+\.[0-9]+(?:-[a-zA-Z0-9.]+)?)_([a-z0-9]+)_([a-z0-9]+)\.zip$`, ) @@ -61,9 +93,13 @@ func (h *LocalHandler) upload(w http.ResponseWriter, r *http.Request) { return } - if remote.PackageType == models.PackageTerraform { + switch remote.PackageType { + case models.PackageTerraform: h.uploadTerraformProvider(w, r, remote, filePath) return + case models.PackagePyPI: + h.uploadPyPI(w, r, remote, filePath) + return } h.uploadGeneric(w, r, remote, filePath) @@ -184,6 +220,135 @@ func (h *LocalHandler) uploadGeneric(w http.ResponseWriter, r *http.Request, rem }) } +func (h *LocalHandler) uploadPyPI(w http.ResponseWriter, r *http.Request, remote *models.Remote, filePath string) { + filename := filePath + if idx := strings.LastIndex(filePath, "/"); idx >= 0 { + filename = filePath[idx+1:] + } + + if !pypiFileRe.MatchString(filename) { + http.Error(w, fmt.Sprintf("filename %q must be a .whl, .tar.gz, or .zip file", filename), http.StatusBadRequest) + return + } + + var pkgName string + if strings.HasSuffix(filename, ".whl") { + pkgName = pypiPackageFromWheel(filename) + } else { + pkgName = pypiPackageFromSdist(filename) + } + if pkgName == "" { + http.Error(w, fmt.Sprintf("cannot parse package name from %q", filename), http.StatusBadRequest) + return + } + + storagePath := pkgName + "/" + filename + + existing, err := h.db.GetLocalFile(r.Context(), remote.Name, storagePath) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if existing != nil { + http.Error(w, fmt.Sprintf("file %q already exists; overwrites are not allowed", storagePath), http.StatusConflict) + return + } + + contentType := "application/zip" + if strings.HasSuffix(filename, ".tar.gz") { + contentType = "application/gzip" + } + + result, err := h.cas.Store(r.Context(), r.Body, contentType) + if err != nil { + http.Error(w, fmt.Sprintf("store failed: %v", err), http.StatusInternalServerError) + return + } + + if err := h.db.UpsertBlob(r.Context(), result.ContentHash, result.S3Key, result.SizeBytes, contentType); err != nil { + http.Error(w, fmt.Sprintf("record blob: %v", err), http.StatusInternalServerError) + return + } + + if err := h.db.CreateLocalFile(r.Context(), remote.Name, storagePath, result.ContentHash); err != nil { + if errors.Is(err, database.ErrAlreadyExists) { + http.Error(w, fmt.Sprintf("file %q already exists; overwrites are not allowed", storagePath), http.StatusConflict) + return + } + http.Error(w, fmt.Sprintf("record file: %v", err), http.StatusInternalServerError) + return + } + + writeJSON(w, http.StatusCreated, map[string]any{ + "package": pkgName, + "filename": filename, + "content_hash": result.ContentHash, + "size_bytes": result.SizeBytes, + }) +} + +func (h *LocalHandler) ServePyPIIndex(w http.ResponseWriter, r *http.Request, repoName string) { + packages, err := h.db.ListLocalFilePackages(r.Context(), repoName) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + var b strings.Builder + b.WriteString("\n\n") + for _, pkg := range packages { + fmt.Fprintf(&b, "%s\n", pkg, pkg) + } + b.WriteString("\n") + + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + io.WriteString(w, b.String()) +} + +func (h *LocalHandler) ServePyPIPackageIndex(w http.ResponseWriter, r *http.Request, repoName, packageName string) { + normalized := pypiNormalize(packageName) + prefix := normalized + "/" + files, err := h.db.ListLocalFilesByPrefix(r.Context(), repoName, prefix) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if len(files) == 0 { + http.Error(w, "not found", http.StatusNotFound) + return + } + + body := h.generatePyPIPackageHTML(normalized, files) + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + w.Write(body) +} + +func (h *LocalHandler) GeneratePyPIPackageHTML(ctx context.Context, repoName, packageName string) ([]byte, error) { + normalized := pypiNormalize(packageName) + prefix := normalized + "/" + files, err := h.db.ListLocalFilesByPrefix(ctx, repoName, prefix) + if err != nil { + return nil, err + } + return h.generatePyPIPackageHTML(normalized, files), nil +} + +func (h *LocalHandler) generatePyPIPackageHTML(packageName string, files []database.LocalFile) []byte { + var b strings.Builder + b.WriteString("\n\n") + for _, f := range files { + filename := strings.TrimPrefix(f.FilePath, packageName+"/") + hash := strings.TrimPrefix(f.ContentHash, "sha256:") + fmt.Fprintf(&b, "%s\n", + packageName, filename, hash, filename) + } + b.WriteString("\n") + return []byte(b.String()) +} + func (h *LocalHandler) download(w http.ResponseWriter, r *http.Request) { repoName := chi.URLParam(r, "name") filePath := chi.URLParam(r, "*") diff --git a/internal/database/local_files.go b/internal/database/local_files.go index b5ba26a..7039eef 100644 --- a/internal/database/local_files.go +++ b/internal/database/local_files.go @@ -99,6 +99,29 @@ func (db *DB) ListLocalFilesByPrefix(ctx context.Context, repoName, prefix strin return files, rows.Err() } +func (db *DB) ListLocalFilePackages(ctx context.Context, repoName string) ([]string, error) { + rows, err := db.Pool.Query(ctx, ` + SELECT DISTINCT split_part(file_path, '/', 1) + FROM local_files + WHERE repo_name = $1 + ORDER BY 1 + `, repoName) + if err != nil { + return nil, err + } + defer rows.Close() + + var packages []string + for rows.Next() { + var pkg string + if err := rows.Scan(&pkg); err != nil { + return nil, err + } + packages = append(packages, pkg) + } + return packages, rows.Err() +} + func (db *DB) DeleteLocalFile(ctx context.Context, repoName, filePath string) error { _, err := db.Pool.Exec(ctx, `DELETE FROM local_files WHERE repo_name = $1 AND file_path = $2`, repoName, filePath) return err