--- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: litellm-hpa namespace: litellm spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: litellm minReplicas: 2 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 60 behavior: scaleUp: stabilizationWindowSeconds: 0 selectPolicy: Max policies: - type: Percent value: 100 periodSeconds: 30 - type: Pods value: 4 periodSeconds: 30 scaleDown: stabilizationWindowSeconds: 300 selectPolicy: Min policies: - type: Percent value: 10 periodSeconds: 60 - type: Pods value: 2 periodSeconds: 60