feat: add litellm to new aitooling ArgoCD project (#94)
Deploys LiteLLM proxy with CNPG PostgreSQL (3-instance HA), PgBouncer pooler, and Redis cache. Introduces a dedicated aitooling AppProject and ApplicationSet to keep AI tooling services separate from platform infra. Reviewed-on: #94
This commit was merged in pull request #94.
This commit is contained in:
@@ -0,0 +1,91 @@
|
||||
---
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: litellm-postgres
|
||||
namespace: litellm
|
||||
spec:
|
||||
affinity:
|
||||
podAntiAffinityType: preferred
|
||||
bootstrap:
|
||||
initdb:
|
||||
database: litellm
|
||||
encoding: UTF8
|
||||
localeCType: C
|
||||
localeCollate: C
|
||||
owner: litellm
|
||||
secret:
|
||||
name: postgres-credentials
|
||||
enablePDB: true
|
||||
enableSuperuserAccess: false
|
||||
failoverDelay: 0
|
||||
imageName: ghcr.io/cloudnative-pg/postgresql:17-minimal-trixie
|
||||
instances: 3
|
||||
logLevel: info
|
||||
maxSyncReplicas: 0
|
||||
minSyncReplicas: 0
|
||||
monitoring:
|
||||
customQueriesConfigMap:
|
||||
- key: queries
|
||||
name: cnpg-default-monitoring
|
||||
disableDefaultQueries: false
|
||||
enablePodMonitor: false
|
||||
postgresql:
|
||||
parameters:
|
||||
archive_mode: "on"
|
||||
archive_timeout: 5min
|
||||
dynamic_shared_memory_type: posix
|
||||
effective_cache_size: 256MB
|
||||
full_page_writes: "on"
|
||||
log_destination: csvlog
|
||||
log_directory: /controller/log
|
||||
log_filename: postgres
|
||||
log_rotation_age: "0"
|
||||
log_rotation_size: "0"
|
||||
log_truncate_on_rotation: "false"
|
||||
logging_collector: "on"
|
||||
max_connections: "200"
|
||||
max_parallel_workers: "16"
|
||||
max_replication_slots: "16"
|
||||
max_worker_processes: "16"
|
||||
shared_buffers: 128MB
|
||||
shared_memory_type: mmap
|
||||
ssl_max_protocol_version: TLSv1.3
|
||||
ssl_min_protocol_version: TLSv1.3
|
||||
wal_keep_size: 256MB
|
||||
wal_level: logical
|
||||
wal_log_hints: "on"
|
||||
wal_receiver_timeout: 5s
|
||||
wal_sender_timeout: 5s
|
||||
syncReplicaElectionConstraint:
|
||||
enabled: false
|
||||
primaryUpdateMethod: restart
|
||||
primaryUpdateStrategy: unsupervised
|
||||
probes:
|
||||
liveness:
|
||||
isolationCheck:
|
||||
connectionTimeout: 1000
|
||||
enabled: true
|
||||
requestTimeout: 1000
|
||||
replicationSlots:
|
||||
highAvailability:
|
||||
enabled: true
|
||||
slotPrefix: _cnpg_
|
||||
synchronizeReplicas:
|
||||
enabled: true
|
||||
updateInterval: 30
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
smartShutdownTimeout: 180
|
||||
startDelay: 3600
|
||||
stopDelay: 1800
|
||||
storage:
|
||||
resizeInUseVolumes: true
|
||||
size: 10Gi
|
||||
storageClass: cephrbd-fast-delete
|
||||
switchoverDelay: 3600
|
||||
@@ -0,0 +1,33 @@
|
||||
---
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Pooler
|
||||
metadata:
|
||||
name: litellm-postgres-pooler
|
||||
namespace: litellm
|
||||
spec:
|
||||
cluster:
|
||||
name: litellm-postgres
|
||||
instances: 2
|
||||
pgbouncer:
|
||||
parameters:
|
||||
default_pool_size: "100"
|
||||
max_client_conn: "400"
|
||||
paused: false
|
||||
poolMode: session
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: pooler
|
||||
spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- pooler
|
||||
topologyKey: kubernetes.io/hostname
|
||||
containers: []
|
||||
type: rw
|
||||
@@ -0,0 +1,67 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: litellm
|
||||
namespace: litellm
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litellm
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litellm
|
||||
spec:
|
||||
containers:
|
||||
- name: litellm
|
||||
image: docker.litellm.ai/berriai/litellm-database:main-stable
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- --config
|
||||
- /app/config.yaml
|
||||
- --port
|
||||
- "4000"
|
||||
- --num_workers
|
||||
- "8"
|
||||
ports:
|
||||
- containerPort: 4000
|
||||
name: http
|
||||
protocol: TCP
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: litellm-credentials
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/liveliness
|
||||
port: 4000
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/readiness
|
||||
port: 4000
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
cpu: "1"
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 512Mi
|
||||
volumeMounts:
|
||||
- mountPath: /app/config.yaml
|
||||
name: config
|
||||
subPath: config.yaml
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: litellm-config
|
||||
@@ -0,0 +1,41 @@
|
||||
---
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: litellm-hpa
|
||||
namespace: litellm
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: litellm
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 60
|
||||
behavior:
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 0
|
||||
selectPolicy: Max
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 100
|
||||
periodSeconds: 30
|
||||
- type: Pods
|
||||
value: 4
|
||||
periodSeconds: 30
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300
|
||||
selectPolicy: Min
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 10
|
||||
periodSeconds: 60
|
||||
- type: Pods
|
||||
value: 2
|
||||
periodSeconds: 60
|
||||
@@ -0,0 +1,29 @@
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
external-dns.alpha.kubernetes.io/hostname: litellm.k8s.syd1.au.unkin.net
|
||||
external-dns.alpha.kubernetes.io/target: 198.18.200.0
|
||||
cert-manager.io/cluster-issuer: vault-issuer
|
||||
cert-manager.io/common-name: litellm.k8s.syd1.au.unkin.net
|
||||
cert-manager.io/private-key-size: "4096"
|
||||
name: litellm
|
||||
namespace: litellm
|
||||
spec:
|
||||
rules:
|
||||
- host: litellm.k8s.syd1.au.unkin.net
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: litellm
|
||||
port:
|
||||
number: 4000
|
||||
path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- litellm.k8s.syd1.au.unkin.net
|
||||
secretName: litellm-tls
|
||||
@@ -0,0 +1,23 @@
|
||||
---
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- cnpg_cluster.yaml
|
||||
- cnpg_pooler.yaml
|
||||
- deployment.yaml
|
||||
- hpa.yaml
|
||||
- ingress.yaml
|
||||
- namespace.yaml
|
||||
- redis-deployment.yaml
|
||||
- redis-pvc.yaml
|
||||
- services.yaml
|
||||
- vaultauth.yaml
|
||||
- vaultstaticsecret.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: litellm-config
|
||||
files:
|
||||
- config.yaml=resources/config.yaml
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: litellm
|
||||
@@ -0,0 +1,67 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: litellm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7-alpine
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- redis-server
|
||||
- --save
|
||||
- "20"
|
||||
- "1"
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
name: redis
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
mountPropagation: None
|
||||
name: data
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: litellm-redis-data
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: litellm-redis-data
|
||||
namespace: litellm
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
storageClassName: cephrbd-fast-delete
|
||||
volumeMode: Filesystem
|
||||
@@ -0,0 +1,15 @@
|
||||
model_list: []
|
||||
|
||||
router_settings:
|
||||
redis_host: redis-service
|
||||
redis_port: 6379
|
||||
|
||||
general_settings:
|
||||
use_redis_transaction_buffer: true
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
cache_params:
|
||||
type: redis
|
||||
host: redis-service
|
||||
port: 6379
|
||||
@@ -0,0 +1,34 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: litellm
|
||||
namespace: litellm
|
||||
spec:
|
||||
internalTrafficPolicy: Cluster
|
||||
ports:
|
||||
- name: http
|
||||
port: 4000
|
||||
protocol: TCP
|
||||
targetPort: http
|
||||
selector:
|
||||
app: litellm
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-service
|
||||
namespace: litellm
|
||||
spec:
|
||||
internalTrafficPolicy: Cluster
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
protocol: TCP
|
||||
targetPort: redis
|
||||
selector:
|
||||
app: redis
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
@@ -0,0 +1,18 @@
|
||||
---
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultAuth
|
||||
metadata:
|
||||
name: default
|
||||
namespace: litellm
|
||||
spec:
|
||||
allowedNamespaces:
|
||||
- litellm
|
||||
kubernetes:
|
||||
audiences:
|
||||
- vault
|
||||
role: default
|
||||
serviceAccount: default
|
||||
tokenExpirationSeconds: 600
|
||||
method: kubernetes
|
||||
mount: k8s/au/syd1
|
||||
vaultConnectionRef: vso-system/default
|
||||
@@ -0,0 +1,34 @@
|
||||
---
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: postgres-credentials
|
||||
namespace: litellm
|
||||
spec:
|
||||
destination:
|
||||
create: true
|
||||
name: postgres-credentials
|
||||
overwrite: true
|
||||
hmacSecretData: true
|
||||
mount: kv
|
||||
path: kubernetes/namespace/litellm/default/postgres-credentials
|
||||
refreshAfter: 5m
|
||||
type: kv-v2
|
||||
vaultAuthRef: default
|
||||
---
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: litellm-credentials
|
||||
namespace: litellm
|
||||
spec:
|
||||
destination:
|
||||
create: true
|
||||
name: litellm-credentials
|
||||
overwrite: true
|
||||
hmacSecretData: true
|
||||
mount: kv
|
||||
path: kubernetes/namespace/litellm/default/litellm-credentials
|
||||
refreshAfter: 5m
|
||||
type: kv-v2
|
||||
vaultAuthRef: default
|
||||
@@ -0,0 +1,6 @@
|
||||
---
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- ../../../base/litellm
|
||||
Reference in New Issue
Block a user