feat: add local RPM repository with on-demand repodata
ci/woodpecker/pr/pre-commit Pipeline failed
ci/woodpecker/pr/test Pipeline was successful
ci/woodpecker/pr/build Pipeline was successful

Upload RPMs to local repos. Metadata is parsed async after upload
using cavaliergopher/rpm and stored in rpm_metadata table. Repodata
(repomd.xml, primary.xml.gz, filelists.xml.gz, other.xml.gz) is
generated on-demand from the DB — nothing stored in S3.

- RPM provider implements LocalUploader (validates .rpm extension,
  stores under Packages/)
- RPM provider implements PostUploadHook (async goroutine parses RPM
  headers, extracts name/version/arch/deps/etc into rpm_metadata)
- RPM provider implements LocalIndexer (serves repodata/* paths by
  querying rpm_metadata and generating XML on the fly)
- New provider interfaces: PostUploadHook, BlobReader, MetadataStore,
  RPMMetadataReader
- New rpm_metadata table with JSONB columns for requires/provides/
  files/changelogs

Tested e2e: upload cowsay RPM → repodata generated → dnf install
from local repo
This commit is contained in:
2026-06-23 23:08:59 +10:00
parent 3a6721c2a7
commit bb172276ba
7 changed files with 627 additions and 2 deletions
+31
View File
@@ -124,6 +124,37 @@ func (db *DB) migrate() error {
CREATE INDEX IF NOT EXISTS idx_access_log_remote_time ON access_log(remote_name, created_at);
ALTER TABLE remotes ADD COLUMN IF NOT EXISTS repo_type TEXT DEFAULT 'remote';
CREATE TABLE IF NOT EXISTS rpm_metadata (
id BIGSERIAL PRIMARY KEY,
repo_name TEXT NOT NULL,
file_path TEXT NOT NULL,
content_hash TEXT NOT NULL,
name TEXT NOT NULL,
epoch INTEGER DEFAULT 0,
version TEXT NOT NULL,
release TEXT NOT NULL,
arch TEXT NOT NULL,
summary TEXT DEFAULT '',
description TEXT DEFAULT '',
rpm_size BIGINT DEFAULT 0,
installed_size BIGINT DEFAULT 0,
license TEXT DEFAULT '',
vendor TEXT DEFAULT '',
build_group TEXT DEFAULT '',
build_host TEXT DEFAULT '',
source_rpm TEXT DEFAULT '',
url TEXT DEFAULT '',
packager TEXT DEFAULT '',
requires JSONB DEFAULT '[]',
provides JSONB DEFAULT '[]',
files JSONB DEFAULT '[]',
changelogs JSONB DEFAULT '[]',
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(repo_name, file_path)
);
CREATE INDEX IF NOT EXISTS idx_rpm_metadata_repo ON rpm_metadata(repo_name);
`)
return err
}
+129
View File
@@ -0,0 +1,129 @@
package database
import (
"context"
"encoding/json"
"git.unkin.net/unkin/artifactapi/internal/provider"
)
func (db *DB) InsertRPMMetadata(ctx context.Context, meta *provider.RPMMetadata) error {
requiresJSON, _ := json.Marshal(meta.Requires)
providesJSON, _ := json.Marshal(meta.Provides)
filesJSON, _ := json.Marshal(meta.Files)
changelogsJSON, _ := json.Marshal(meta.Changelogs)
_, err := db.Pool.Exec(ctx, `
INSERT INTO rpm_metadata (
repo_name, file_path, content_hash,
name, epoch, version, release, arch,
summary, description, rpm_size, installed_size,
license, vendor, build_group, build_host, source_rpm, url, packager,
requires, provides, files, changelogs
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23)
ON CONFLICT (repo_name, file_path) DO NOTHING
`,
meta.RepoName, meta.FilePath, meta.ContentHash,
meta.Name, meta.Epoch, meta.Version, meta.Release, meta.Arch,
meta.Summary, meta.Description, meta.RPMSize, meta.InstalledSize,
meta.License, meta.Vendor, meta.Group, meta.BuildHost, meta.SourceRPM, meta.URL, meta.Packager,
requiresJSON, providesJSON, filesJSON, changelogsJSON,
)
return err
}
type RPMMetadataRow struct {
RepoName string
FilePath string
ContentHash string
Name string
Epoch int
Version string
Release string
Arch string
Summary string
Description string
RPMSize int64
InstalledSize int64
License string
Vendor string
Group string
BuildHost string
SourceRPM string
URL string
Packager string
Requires json.RawMessage
Provides json.RawMessage
Files json.RawMessage
Changelogs json.RawMessage
}
func (db *DB) ListRPMMetadataEntries(ctx context.Context, repoName string) ([]provider.RPMMetadata, error) {
rows, err := db.ListRPMMetadata(ctx, repoName)
if err != nil {
return nil, err
}
result := make([]provider.RPMMetadata, len(rows))
for i, r := range rows {
meta := provider.RPMMetadata{
RepoName: r.RepoName,
FilePath: r.FilePath,
ContentHash: r.ContentHash,
Name: r.Name,
Epoch: r.Epoch,
Version: r.Version,
Release: r.Release,
Arch: r.Arch,
Summary: r.Summary,
Description: r.Description,
RPMSize: r.RPMSize,
InstalledSize: r.InstalledSize,
License: r.License,
Vendor: r.Vendor,
Group: r.Group,
BuildHost: r.BuildHost,
SourceRPM: r.SourceRPM,
URL: r.URL,
Packager: r.Packager,
}
json.Unmarshal(r.Requires, &meta.Requires)
json.Unmarshal(r.Provides, &meta.Provides)
json.Unmarshal(r.Files, &meta.Files)
json.Unmarshal(r.Changelogs, &meta.Changelogs)
result[i] = meta
}
return result, nil
}
func (db *DB) ListRPMMetadata(ctx context.Context, repoName string) ([]RPMMetadataRow, error) {
rows, err := db.Pool.Query(ctx, `
SELECT repo_name, file_path, content_hash,
name, epoch, version, release, arch,
summary, description, rpm_size, installed_size,
license, vendor, build_group, build_host, source_rpm, url, packager,
requires, provides, files, changelogs
FROM rpm_metadata
WHERE repo_name = $1
ORDER BY name, epoch, version, release, arch
`, repoName)
if err != nil {
return nil, err
}
defer rows.Close()
var result []RPMMetadataRow
for rows.Next() {
var r RPMMetadataRow
if err := rows.Scan(
&r.RepoName, &r.FilePath, &r.ContentHash,
&r.Name, &r.Epoch, &r.Version, &r.Release, &r.Arch,
&r.Summary, &r.Description, &r.RPMSize, &r.InstalledSize,
&r.License, &r.Vendor, &r.Group, &r.BuildHost, &r.SourceRPM, &r.URL, &r.Packager,
&r.Requires, &r.Provides, &r.Files, &r.Changelogs,
); err != nil {
return nil, err
}
result = append(result, r)
}
return result, rows.Err()
}