feat: add litellm to new aitooling ArgoCD project #94

Merged
unkinben merged 1 commits from benvin/litellm into main 2026-05-01 21:40:26 +10:00
18 changed files with 529 additions and 0 deletions
Showing only changes of commit aa00ed988a - Show all commits
+91
View File
@@ -0,0 +1,91 @@
---
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: litellm-postgres
namespace: litellm
spec:
affinity:
podAntiAffinityType: preferred
bootstrap:
initdb:
database: litellm
encoding: UTF8
localeCType: C
localeCollate: C
owner: litellm
secret:
name: postgres-credentials
enablePDB: true
enableSuperuserAccess: false
failoverDelay: 0
imageName: ghcr.io/cloudnative-pg/postgresql:17-minimal-trixie
instances: 3
logLevel: info
maxSyncReplicas: 0
minSyncReplicas: 0
monitoring:
customQueriesConfigMap:
- key: queries
name: cnpg-default-monitoring
disableDefaultQueries: false
enablePodMonitor: false
postgresql:
parameters:
archive_mode: "on"
archive_timeout: 5min
dynamic_shared_memory_type: posix
effective_cache_size: 256MB
full_page_writes: "on"
log_destination: csvlog
log_directory: /controller/log
log_filename: postgres
log_rotation_age: "0"
log_rotation_size: "0"
log_truncate_on_rotation: "false"
logging_collector: "on"
max_connections: "200"
max_parallel_workers: "16"
max_replication_slots: "16"
max_worker_processes: "16"
shared_buffers: 128MB
shared_memory_type: mmap
ssl_max_protocol_version: TLSv1.3
ssl_min_protocol_version: TLSv1.3
wal_keep_size: 256MB
wal_level: logical
wal_log_hints: "on"
wal_receiver_timeout: 5s
wal_sender_timeout: 5s
syncReplicaElectionConstraint:
enabled: false
primaryUpdateMethod: restart
primaryUpdateStrategy: unsupervised
probes:
liveness:
isolationCheck:
connectionTimeout: 1000
enabled: true
requestTimeout: 1000
replicationSlots:
highAvailability:
enabled: true
slotPrefix: _cnpg_
synchronizeReplicas:
enabled: true
updateInterval: 30
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 256Mi
smartShutdownTimeout: 180
startDelay: 3600
stopDelay: 1800
storage:
resizeInUseVolumes: true
size: 10Gi
storageClass: cephrbd-fast-delete
switchoverDelay: 3600
+33
View File
@@ -0,0 +1,33 @@
---
apiVersion: postgresql.cnpg.io/v1
kind: Pooler
metadata:
name: litellm-postgres-pooler
namespace: litellm
spec:
cluster:
name: litellm-postgres
instances: 2
pgbouncer:
parameters:
default_pool_size: "100"
max_client_conn: "400"
paused: false
poolMode: session
template:
metadata:
labels:
app: pooler
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- pooler
topologyKey: kubernetes.io/hostname
containers: []
type: rw
+67
View File
@@ -0,0 +1,67 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm
namespace: litellm
spec:
selector:
matchLabels:
app: litellm
template:
metadata:
labels:
app: litellm
spec:
containers:
- name: litellm
image: docker.litellm.ai/berriai/litellm-database:main-stable
imagePullPolicy: Always
args:
- --config
- /app/config.yaml
- --port
- "4000"
- --num_workers
- "8"
ports:
- containerPort: 4000
name: http
protocol: TCP
envFrom:
- secretRef:
name: litellm-credentials
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: 250m
memory: 512Mi
volumeMounts:
- mountPath: /app/config.yaml
name: config
subPath: config.yaml
restartPolicy: Always
volumes:
- name: config
configMap:
name: litellm-config
+41
View File
@@ -0,0 +1,41 @@
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: litellm-hpa
namespace: litellm
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: litellm
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 60
behavior:
scaleUp:
stabilizationWindowSeconds: 0
selectPolicy: Max
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
scaleDown:
stabilizationWindowSeconds: 300
selectPolicy: Min
policies:
- type: Percent
value: 10
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
+29
View File
@@ -0,0 +1,29 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
kubernetes.io/ingress.class: nginx
external-dns.alpha.kubernetes.io/hostname: litellm.k8s.syd1.au.unkin.net
external-dns.alpha.kubernetes.io/target: 198.18.200.0
cert-manager.io/cluster-issuer: vault-issuer
cert-manager.io/common-name: litellm.k8s.syd1.au.unkin.net
cert-manager.io/private-key-size: "4096"
name: litellm
namespace: litellm
spec:
rules:
- host: litellm.k8s.syd1.au.unkin.net
http:
paths:
- backend:
service:
name: litellm
port:
number: 4000
path: /
pathType: Prefix
tls:
- hosts:
- litellm.k8s.syd1.au.unkin.net
secretName: litellm-tls
+23
View File
@@ -0,0 +1,23 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cnpg_cluster.yaml
- cnpg_pooler.yaml
- deployment.yaml
- hpa.yaml
- ingress.yaml
- namespace.yaml
- redis-deployment.yaml
- redis-pvc.yaml
- services.yaml
- vaultauth.yaml
- vaultstaticsecret.yaml
configMapGenerator:
- name: litellm-config
files:
- config.yaml=resources/config.yaml
options:
disableNameSuffixHash: true
+5
View File
@@ -0,0 +1,5 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: litellm
+67
View File
@@ -0,0 +1,67 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
namespace: litellm
spec:
replicas: 1
selector:
matchLabels:
app: redis
strategy:
type: Recreate
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:7-alpine
imagePullPolicy: IfNotPresent
command:
- redis-server
- --save
- "20"
- "1"
ports:
- containerPort: 6379
name: redis
protocol: TCP
livenessProbe:
exec:
command:
- redis-cli
- ping
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
exec:
command:
- redis-cli
- ping
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 50m
memory: 128Mi
volumeMounts:
- mountPath: /data
mountPropagation: None
name: data
restartPolicy: Always
volumes:
- name: data
persistentVolumeClaim:
claimName: litellm-redis-data
+14
View File
@@ -0,0 +1,14 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: litellm-redis-data
namespace: litellm
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
storageClassName: cephrbd-fast-delete
volumeMode: Filesystem
+15
View File
@@ -0,0 +1,15 @@
model_list: []
router_settings:
redis_host: redis-service
redis_port: 6379
general_settings:
use_redis_transaction_buffer: true
litellm_settings:
cache: true
cache_params:
type: redis
host: redis-service
port: 6379
+34
View File
@@ -0,0 +1,34 @@
---
apiVersion: v1
kind: Service
metadata:
name: litellm
namespace: litellm
spec:
internalTrafficPolicy: Cluster
ports:
- name: http
port: 4000
protocol: TCP
targetPort: http
selector:
app: litellm
sessionAffinity: None
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
name: redis-service
namespace: litellm
spec:
internalTrafficPolicy: Cluster
ports:
- name: redis
port: 6379
protocol: TCP
targetPort: redis
selector:
app: redis
sessionAffinity: None
type: ClusterIP
+18
View File
@@ -0,0 +1,18 @@
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultAuth
metadata:
name: default
namespace: litellm
spec:
allowedNamespaces:
- litellm
kubernetes:
audiences:
- vault
role: default
serviceAccount: default
tokenExpirationSeconds: 600
method: kubernetes
mount: k8s/au/syd1
vaultConnectionRef: vso-system/default
+34
View File
@@ -0,0 +1,34 @@
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: postgres-credentials
namespace: litellm
spec:
destination:
create: true
name: postgres-credentials
overwrite: true
hmacSecretData: true
mount: kv
path: kubernetes/namespace/litellm/default/postgres-credentials
refreshAfter: 5m
type: kv-v2
vaultAuthRef: default
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: litellm-credentials
namespace: litellm
spec:
destination:
create: true
name: litellm-credentials
overwrite: true
hmacSecretData: true
mount: kv
path: kubernetes/namespace/litellm/default/litellm-credentials
refreshAfter: 5m
type: kv-v2
vaultAuthRef: default
@@ -0,0 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../base/litellm
+31
View File
@@ -0,0 +1,31 @@
---
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
name: aitooling-apps
namespace: argocd
spec:
generators:
- git:
repoURL: https://git.unkin.net/unkin/argocd-apps
revision: HEAD
directories:
- path: apps/overlays/*/litellm
template:
metadata:
name: 'aitooling-{{path[3]}}'
spec:
project: aitooling
source:
repoURL: https://git.unkin.net/unkin/argocd-apps
targetRevision: HEAD
path: '{{path}}'
destination:
server: https://kubernetes.default.svc
namespace: '{{path[3]}}'
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- ServerSideApply=true
@@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- aitooling.yaml
- observability.yaml
- platform.yaml
- storage.yaml
+19
View File
@@ -0,0 +1,19 @@
---
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: aitooling
namespace: argocd
spec:
description: AI tooling services
sourceRepos:
- https://git.unkin.net/unkin/argocd-apps
destinations:
- namespace: 'litellm'
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: ''
kind: Namespace
namespaceResourceWhitelist:
- group: '*'
kind: '*'
+1
View File
@@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- aitooling.yaml
- observability.yaml
- platform.yaml
- storage.yaml