fix: poll the store on lock miss to coalesce cache-miss stampedes
On a fetch-lock miss the engine slept a flat 500ms once and, on a store miss, fell through to fetch upstream unlocked anyway, so a cold-cache stampede hit upstream once per waiter. Poll the store for up to 5s (100ms interval, context-aware) so waiters pick up the leader's result instead of duplicating the upstream fetch. Refs #75
This commit is contained in:
@@ -75,9 +75,10 @@ func (e *Engine) Fetch(ctx context.Context, remote models.Remote, path string, p
|
||||
}
|
||||
|
||||
if !locked {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
result, err := e.serveFromStore(ctx, remote, path)
|
||||
if err == nil {
|
||||
// Another request holds the fetch lock. Poll the store until the leader
|
||||
// populates it rather than immediately racing to fetch upstream too; a
|
||||
// cold-cache stampede otherwise hits upstream once per waiter.
|
||||
if result := e.waitForStore(ctx, remote, path); result != nil {
|
||||
result.Source = "cache"
|
||||
go e.logAccess(remote.Name, path, true, result.Size, 0)
|
||||
return result, nil
|
||||
@@ -247,6 +248,31 @@ func (e *Engine) fetchFromUpstream(ctx context.Context, remote models.Remote, pa
|
||||
}, nil
|
||||
}
|
||||
|
||||
// waitForStore polls the store for an artifact populated by the request that
|
||||
// holds the fetch lock, returning it once available or nil if it does not
|
||||
// appear within the wait budget (after which the caller fetches upstream
|
||||
// itself). It stops early if the request context is cancelled.
|
||||
func (e *Engine) waitForStore(ctx context.Context, remote models.Remote, path string) *FetchResult {
|
||||
const (
|
||||
pollInterval = 100 * time.Millisecond
|
||||
maxWait = 5 * time.Second
|
||||
)
|
||||
deadline := time.Now().Add(maxWait)
|
||||
for {
|
||||
if result, err := e.serveFromStore(ctx, remote, path); err == nil {
|
||||
return result
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case <-time.After(pollInterval):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) serveFromStore(ctx context.Context, remote models.Remote, path string) (*FetchResult, error) {
|
||||
artifact, err := e.db.GetArtifact(ctx, remote.Name, path)
|
||||
if err == nil && artifact != nil {
|
||||
|
||||
Reference in New Issue
Block a user