e05f9bfd83
finding litellm performance has dropped, crashed in multiple cases, and then it had scaled to the maximum level using the majority of memory in cluster. - reduce the rate at which litellm autoscales - increase the requests/limits to match usage Reviewed-on: #144
72 lines
1.7 KiB
YAML
72 lines
1.7 KiB
YAML
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: litellm
|
|
namespace: litellm
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: litellm
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
reloader.stakater.com/auto: "true"
|
|
labels:
|
|
app: litellm
|
|
spec:
|
|
containers:
|
|
- name: litellm
|
|
image: docker.litellm.ai/berriai/litellm-database:main-stable
|
|
imagePullPolicy: Always
|
|
args:
|
|
- --config
|
|
- /app/config.yaml
|
|
- --port
|
|
- "4000"
|
|
- --num_workers
|
|
- "8"
|
|
ports:
|
|
- containerPort: 4000
|
|
name: http
|
|
protocol: TCP
|
|
envFrom:
|
|
- secretRef:
|
|
name: litellm-credentials
|
|
- configMapRef:
|
|
name: litellm-env
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health/liveliness
|
|
port: 4000
|
|
failureThreshold: 3
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 30
|
|
successThreshold: 1
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health/readiness
|
|
port: 4000
|
|
failureThreshold: 3
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 10
|
|
successThreshold: 1
|
|
timeoutSeconds: 5
|
|
resources:
|
|
limits:
|
|
cpu: "2"
|
|
memory: 8Gi
|
|
requests:
|
|
cpu: 250m
|
|
memory: 6Gi
|
|
volumeMounts:
|
|
- mountPath: /app/config.yaml
|
|
name: config
|
|
subPath: config.yaml
|
|
restartPolicy: Always
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: litellm-config
|