feat: increase litellm resources (#144)

finding litellm performance has dropped, crashed in multiple cases, and
then it had scaled to the maximum level using the majority of memory in
cluster.

- reduce the rate at which litellm autoscales
- increase the requests/limits to match usage

Reviewed-on: #144
This commit was merged in pull request #144.
This commit is contained in:
2026-05-23 17:59:43 +10:00
parent 445d8b6e7e
commit e05f9bfd83
3 changed files with 9 additions and 9 deletions
+3 -3
View File
@@ -76,11 +76,11 @@ spec:
updateInterval: 30 updateInterval: 30
resources: resources:
limits: limits:
cpu: 500m cpu: 1
memory: 512Mi memory: 1024Mi
requests: requests:
cpu: 250m cpu: 250m
memory: 256Mi memory: 512Mi
smartShutdownTimeout: 180 smartShutdownTimeout: 180
startDelay: 3600 startDelay: 3600
stopDelay: 1800 stopDelay: 1800
+2 -2
View File
@@ -56,10 +56,10 @@ spec:
resources: resources:
limits: limits:
cpu: "2" cpu: "2"
memory: 6Gi memory: 8Gi
requests: requests:
cpu: 250m cpu: 250m
memory: 2Gi memory: 6Gi
volumeMounts: volumeMounts:
- mountPath: /app/config.yaml - mountPath: /app/config.yaml
name: config name: config
+4 -4
View File
@@ -10,14 +10,14 @@ spec:
kind: Deployment kind: Deployment
name: litellm name: litellm
minReplicas: 2 minReplicas: 2
maxReplicas: 10 maxReplicas: 4
metrics: metrics:
- type: Resource - type: Resource
resource: resource:
name: cpu name: cpu
target: target:
type: Utilization type: Utilization
averageUtilization: 60 averageUtilization: 80
behavior: behavior:
scaleUp: scaleUp:
stabilizationWindowSeconds: 0 stabilizationWindowSeconds: 0
@@ -25,7 +25,7 @@ spec:
policies: policies:
- type: Percent - type: Percent
value: 100 value: 100
periodSeconds: 30 periodSeconds: 60
- type: Pods - type: Pods
value: 4 value: 4
periodSeconds: 30 periodSeconds: 30
@@ -34,7 +34,7 @@ spec:
selectPolicy: Min selectPolicy: Min
policies: policies:
- type: Percent - type: Percent
value: 10 value: 30
periodSeconds: 60 periodSeconds: 60
- type: Pods - type: Pods
value: 2 value: 2