From e05f9bfd83fba0dfaa82079746b30bc8bebd5d4b Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Sat, 23 May 2026 17:59:43 +1000 Subject: [PATCH] feat: increase litellm resources (#144) finding litellm performance has dropped, crashed in multiple cases, and then it had scaled to the maximum level using the majority of memory in cluster. - reduce the rate at which litellm autoscales - increase the requests/limits to match usage Reviewed-on: https://git.unkin.net/unkin/argocd-apps/pulls/144 --- apps/base/litellm/cnpg_cluster.yaml | 6 +++--- apps/base/litellm/deployment.yaml | 4 ++-- apps/base/litellm/hpa.yaml | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/base/litellm/cnpg_cluster.yaml b/apps/base/litellm/cnpg_cluster.yaml index ea95389..5c09d29 100644 --- a/apps/base/litellm/cnpg_cluster.yaml +++ b/apps/base/litellm/cnpg_cluster.yaml @@ -76,11 +76,11 @@ spec: updateInterval: 30 resources: limits: - cpu: 500m - memory: 512Mi + cpu: 1 + memory: 1024Mi requests: cpu: 250m - memory: 256Mi + memory: 512Mi smartShutdownTimeout: 180 startDelay: 3600 stopDelay: 1800 diff --git a/apps/base/litellm/deployment.yaml b/apps/base/litellm/deployment.yaml index 0f3b1fc..6039b10 100644 --- a/apps/base/litellm/deployment.yaml +++ b/apps/base/litellm/deployment.yaml @@ -56,10 +56,10 @@ spec: resources: limits: cpu: "2" - memory: 6Gi + memory: 8Gi requests: cpu: 250m - memory: 2Gi + memory: 6Gi volumeMounts: - mountPath: /app/config.yaml name: config diff --git a/apps/base/litellm/hpa.yaml b/apps/base/litellm/hpa.yaml index b954aa8..746626c 100644 --- a/apps/base/litellm/hpa.yaml +++ b/apps/base/litellm/hpa.yaml @@ -10,14 +10,14 @@ spec: kind: Deployment name: litellm minReplicas: 2 - maxReplicas: 10 + maxReplicas: 4 metrics: - type: Resource resource: name: cpu target: type: Utilization - averageUtilization: 60 + averageUtilization: 80 behavior: scaleUp: stabilizationWindowSeconds: 0 @@ -25,7 +25,7 @@ spec: policies: - type: Percent value: 100 - periodSeconds: 30 + periodSeconds: 60 - type: Pods value: 4 periodSeconds: 30 @@ -34,7 +34,7 @@ spec: selectPolicy: Min policies: - type: Percent - value: 10 + value: 30 periodSeconds: 60 - type: Pods value: 2