From f61ab99ae8de536184df022aea810dfccf30d3db Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Thu, 2 Jul 2026 22:24:49 +1000 Subject: [PATCH] fix: set timeouts on the upstream HTTP client (#83) Fixes #67 ## Why The proxy used `http.DefaultClient` for all upstream GET/HEAD and bearer-token requests. It has no timeouts, so a slow or hung upstream holds a goroutine and connection indefinitely. ## Changes - Add a shared `upstreamClient` (`internal/proxy/httpclient.go`) with dial, TLS-handshake, response-header and idle-connection timeouts, plus connection pooling. - Deliberately no overall `Client.Timeout`, so large artifact bodies can still stream; total time is bounded by the request context. - Route all four upstream calls in the engine through it. ## Validation - `make e2e` passes. Reviewed-on: https://git.unkin.net/unkin/artifactapi/pulls/83 Co-authored-by: Ben Vincent Co-committed-by: Ben Vincent --- e2e/management_test.go | 24 ++++++++++ internal/database/postgres.go | 3 ++ internal/database/remotes.go | 19 +++++--- internal/proxy/engine.go | 8 ++-- internal/proxy/httpclient.go | 83 +++++++++++++++++++++++++++++++++++ pkg/models/remote.go | 5 +++ 6 files changed, 133 insertions(+), 9 deletions(-) create mode 100644 internal/proxy/httpclient.go diff --git a/e2e/management_test.go b/e2e/management_test.go index ce05963..a5b2467 100644 --- a/e2e/management_test.go +++ b/e2e/management_test.go @@ -24,6 +24,30 @@ func TestRoot(t *testing.T) { } } +func TestRemoteUpstreamTimeouts(t *testing.T) { + createRemote(t, `{ + "name": "timeout-test", + "package_type": "generic", + "base_url": "https://example.com", + "stale_on_error": true, + "upstream_dial_timeout": 3, + "upstream_tls_timeout": 4, + "upstream_response_header_timeout": 5 + }`) + defer deleteRemote(t, "timeout-test") + + remote := getJSON(t, apiURL("/api/v2/remotes/timeout-test")) + for field, want := range map[string]float64{ + "upstream_dial_timeout": 3, + "upstream_tls_timeout": 4, + "upstream_response_header_timeout": 5, + } { + if got, _ := remote[field].(float64); got != want { + t.Errorf("%s: got %v, want %v", field, remote[field], want) + } + } +} + func TestRemoteCRUD(t *testing.T) { createRemote(t, `{ "name": "test-generic", diff --git a/internal/database/postgres.go b/internal/database/postgres.go index 1091b1b..32cc528 100644 --- a/internal/database/postgres.go +++ b/internal/database/postgres.go @@ -124,6 +124,9 @@ func (db *DB) migrate() error { CREATE INDEX IF NOT EXISTS idx_access_log_remote_time ON access_log(remote_name, created_at); ALTER TABLE remotes ADD COLUMN IF NOT EXISTS repo_type TEXT DEFAULT 'remote'; + ALTER TABLE remotes ADD COLUMN IF NOT EXISTS upstream_dial_timeout INTEGER DEFAULT 0; + ALTER TABLE remotes ADD COLUMN IF NOT EXISTS upstream_tls_timeout INTEGER DEFAULT 0; + ALTER TABLE remotes ADD COLUMN IF NOT EXISTS upstream_response_header_timeout INTEGER DEFAULT 0; CREATE TABLE IF NOT EXISTS rpm_metadata ( id BIGSERIAL PRIMARY KEY, diff --git a/internal/database/remotes.go b/internal/database/remotes.go index 8cca8ba..a7d2b62 100644 --- a/internal/database/remotes.go +++ b/internal/database/remotes.go @@ -11,7 +11,9 @@ const remoteCols = `name, package_type, repo_type, base_url, description, userna patterns, blocklist, mutable_patterns, immutable_patterns, ban_tags_enabled, ban_tags, quarantine_enabled, quarantine_days, stale_on_error, - releases_remote, managed_by, created_at, updated_at` + releases_remote, managed_by, + upstream_dial_timeout, upstream_tls_timeout, upstream_response_header_timeout, + created_at, updated_at` func scanRemote(scanner interface{ Scan(...any) error }, r *models.Remote) error { return scanner.Scan( @@ -20,7 +22,9 @@ func scanRemote(scanner interface{ Scan(...any) error }, r *models.Remote) error &r.Patterns, &r.Blocklist, &r.MutablePatterns, &r.ImmutablePatterns, &r.BanTagsEnabled, &r.BanTags, &r.QuarantineEnabled, &r.QuarantineDays, &r.StaleOnError, - &r.ReleasesRemote, &r.ManagedBy, &r.CreatedAt, &r.UpdatedAt, + &r.ReleasesRemote, &r.ManagedBy, + &r.UpstreamDialTimeout, &r.UpstreamTLSTimeout, &r.UpstreamResponseHeaderTimeout, + &r.CreatedAt, &r.UpdatedAt, ) } @@ -59,8 +63,9 @@ func (db *DB) CreateRemote(ctx context.Context, r *models.Remote) error { patterns, blocklist, mutable_patterns, immutable_patterns, ban_tags_enabled, ban_tags, quarantine_enabled, quarantine_days, stale_on_error, - releases_remote, managed_by - ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21) + releases_remote, managed_by, + upstream_dial_timeout, upstream_tls_timeout, upstream_response_header_timeout + ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24) `, r.Name, r.PackageType, r.RepoType, r.BaseURL, r.Description, r.Username, r.Password, r.ImmutableTTL, r.MutableTTL, r.CheckMutable, @@ -68,6 +73,7 @@ func (db *DB) CreateRemote(ctx context.Context, r *models.Remote) error { r.BanTagsEnabled, r.BanTags, r.QuarantineEnabled, r.QuarantineDays, r.StaleOnError, r.ReleasesRemote, r.ManagedBy, + r.UpstreamDialTimeout, r.UpstreamTLSTimeout, r.UpstreamResponseHeaderTimeout, ) return err } @@ -80,7 +86,9 @@ func (db *DB) UpdateRemote(ctx context.Context, r *models.Remote) error { patterns=$11, blocklist=$12, mutable_patterns=$13, immutable_patterns=$14, ban_tags_enabled=$15, ban_tags=$16, quarantine_enabled=$17, quarantine_days=$18, stale_on_error=$19, - releases_remote=$20, managed_by=$21, updated_at=NOW() + releases_remote=$20, managed_by=$21, + upstream_dial_timeout=$22, upstream_tls_timeout=$23, upstream_response_header_timeout=$24, + updated_at=NOW() WHERE name=$1 `, r.Name, r.PackageType, r.RepoType, r.BaseURL, r.Description, r.Username, r.Password, @@ -89,6 +97,7 @@ func (db *DB) UpdateRemote(ctx context.Context, r *models.Remote) error { r.BanTagsEnabled, r.BanTags, r.QuarantineEnabled, r.QuarantineDays, r.StaleOnError, r.ReleasesRemote, r.ManagedBy, + r.UpstreamDialTimeout, r.UpstreamTLSTimeout, r.UpstreamResponseHeaderTimeout, ) return err } diff --git a/internal/proxy/engine.go b/internal/proxy/engine.go index 47ecafc..7774c62 100644 --- a/internal/proxy/engine.go +++ b/internal/proxy/engine.go @@ -279,7 +279,7 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa } } - resp, err := http.DefaultClient.Do(req) + resp, err := clientForRemote(remote).Do(req) if err != nil { return nil, &UpstreamError{Err: err} } @@ -295,7 +295,7 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa req2.Header.Set("Accept", accept) } } - resp, err = http.DefaultClient.Do(req2) + resp, err = clientForRemote(remote).Do(req2) if err != nil { return nil, &UpstreamError{Err: err} } @@ -452,7 +452,7 @@ func (e *Engine) checkUpstream(ctx context.Context, remote models.Remote, path, } } - resp, err := http.DefaultClient.Do(req) + resp, err := clientForRemote(remote).Do(req) if err != nil { return false, &UpstreamError{Err: err} } @@ -579,7 +579,7 @@ func fetchBearerToken(ctx context.Context, wwwAuth string, remote models.Remote) req.SetBasicAuth(remote.Username, remote.Password) } - resp, err := http.DefaultClient.Do(req) + resp, err := clientForRemote(remote).Do(req) if err != nil { return "", 0, err } diff --git a/internal/proxy/httpclient.go b/internal/proxy/httpclient.go new file mode 100644 index 0000000..e652851 --- /dev/null +++ b/internal/proxy/httpclient.go @@ -0,0 +1,83 @@ +package proxy + +import ( + "net" + "net/http" + "sync" + "time" + + "git.unkin.net/unkin/artifactapi/pkg/models" +) + +// Default upstream timeouts. A remote may override any of these; a zero +// override falls back to the default here. There is deliberately no overall +// Client.Timeout: the proxy streams arbitrarily large artifacts and total time +// is bounded by the request context instead. We only constrain the phases that +// must never hang — connect, TLS handshake, and time-to-first-response-header — +// so a slow or wedged upstream cannot pin a goroutine and connection. +const ( + defaultDialTimeout = 10 * time.Second + defaultTLSTimeout = 10 * time.Second + defaultResponseHeaderTimeout = 30 * time.Second +) + +type clientKey struct { + dial time.Duration + tls time.Duration + respHeader time.Duration +} + +var ( + clientCacheMu sync.Mutex + clientCache = map[clientKey]*http.Client{} +) + +// upstreamClientFor returns an HTTP client configured with the given timeouts, +// reusing a cached client (and its connection pool) for identical timeout sets. +// Zero values fall back to the defaults. +func upstreamClientFor(dial, tls, respHeader time.Duration) *http.Client { + if dial <= 0 { + dial = defaultDialTimeout + } + if tls <= 0 { + tls = defaultTLSTimeout + } + if respHeader <= 0 { + respHeader = defaultResponseHeaderTimeout + } + key := clientKey{dial: dial, tls: tls, respHeader: respHeader} + + clientCacheMu.Lock() + defer clientCacheMu.Unlock() + if c, ok := clientCache[key]; ok { + return c + } + + c := &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: dial, + KeepAlive: 30 * time.Second, + }).DialContext, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: tls, + ExpectContinueTimeout: 1 * time.Second, + ResponseHeaderTimeout: respHeader, + }, + } + clientCache[key] = c + return c +} + +// clientForRemote returns the upstream client for a remote, applying its +// per-remote timeout overrides (in seconds) on top of the defaults. +func clientForRemote(remote models.Remote) *http.Client { + return upstreamClientFor( + time.Duration(remote.UpstreamDialTimeout)*time.Second, + time.Duration(remote.UpstreamTLSTimeout)*time.Second, + time.Duration(remote.UpstreamResponseHeaderTimeout)*time.Second, + ) +} diff --git a/pkg/models/remote.go b/pkg/models/remote.go index 953336d..5516a25 100644 --- a/pkg/models/remote.go +++ b/pkg/models/remote.go @@ -47,6 +47,11 @@ type Remote struct { MutableTTL int `json:"mutable_ttl"` CheckMutable bool `json:"check_mutable"` + // Upstream HTTP timeouts in seconds. 0 means use the server default. + UpstreamDialTimeout int `json:"upstream_dial_timeout,omitempty"` + UpstreamTLSTimeout int `json:"upstream_tls_timeout,omitempty"` + UpstreamResponseHeaderTimeout int `json:"upstream_response_header_timeout,omitempty"` + Patterns []string `json:"patterns,omitempty"` Blocklist []string `json:"blocklist,omitempty"` MutablePatterns []string `json:"mutable_patterns,omitempty"`