feat: increase litellm resources (#144)
finding litellm performance has dropped, crashed in multiple cases, and then it had scaled to the maximum level using the majority of memory in cluster. - reduce the rate at which litellm autoscales - increase the requests/limits to match usage Reviewed-on: #144
This commit was merged in pull request #144.
This commit is contained in:
@@ -76,11 +76,11 @@ spec:
|
|||||||
updateInterval: 30
|
updateInterval: 30
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 1
|
||||||
memory: 512Mi
|
memory: 1024Mi
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
cpu: 250m
|
||||||
memory: 256Mi
|
memory: 512Mi
|
||||||
smartShutdownTimeout: 180
|
smartShutdownTimeout: 180
|
||||||
startDelay: 3600
|
startDelay: 3600
|
||||||
stopDelay: 1800
|
stopDelay: 1800
|
||||||
|
|||||||
@@ -56,10 +56,10 @@ spec:
|
|||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: "2"
|
cpu: "2"
|
||||||
memory: 6Gi
|
memory: 8Gi
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
cpu: 250m
|
||||||
memory: 2Gi
|
memory: 6Gi
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /app/config.yaml
|
- mountPath: /app/config.yaml
|
||||||
name: config
|
name: config
|
||||||
|
|||||||
@@ -10,14 +10,14 @@ spec:
|
|||||||
kind: Deployment
|
kind: Deployment
|
||||||
name: litellm
|
name: litellm
|
||||||
minReplicas: 2
|
minReplicas: 2
|
||||||
maxReplicas: 10
|
maxReplicas: 4
|
||||||
metrics:
|
metrics:
|
||||||
- type: Resource
|
- type: Resource
|
||||||
resource:
|
resource:
|
||||||
name: cpu
|
name: cpu
|
||||||
target:
|
target:
|
||||||
type: Utilization
|
type: Utilization
|
||||||
averageUtilization: 60
|
averageUtilization: 80
|
||||||
behavior:
|
behavior:
|
||||||
scaleUp:
|
scaleUp:
|
||||||
stabilizationWindowSeconds: 0
|
stabilizationWindowSeconds: 0
|
||||||
@@ -25,7 +25,7 @@ spec:
|
|||||||
policies:
|
policies:
|
||||||
- type: Percent
|
- type: Percent
|
||||||
value: 100
|
value: 100
|
||||||
periodSeconds: 30
|
periodSeconds: 60
|
||||||
- type: Pods
|
- type: Pods
|
||||||
value: 4
|
value: 4
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
@@ -34,7 +34,7 @@ spec:
|
|||||||
selectPolicy: Min
|
selectPolicy: Min
|
||||||
policies:
|
policies:
|
||||||
- type: Percent
|
- type: Percent
|
||||||
value: 10
|
value: 30
|
||||||
periodSeconds: 60
|
periodSeconds: 60
|
||||||
- type: Pods
|
- type: Pods
|
||||||
value: 2
|
value: 2
|
||||||
|
|||||||
Reference in New Issue
Block a user