2026-05-01 21:40:26 +10:00
18 changed files with 529 additions and 0 deletions
@@ -0,0 +1,91 @@
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: litellm-postgres
+  namespace: litellm
+spec:
+  affinity:
+    podAntiAffinityType: preferred
+  bootstrap:
+    initdb:
+      database: litellm
+      encoding: UTF8
+      localeCType: C
+      localeCollate: C
+      owner: litellm
+      secret:
+        name: postgres-credentials
+  enablePDB: true
+  enableSuperuserAccess: false
+  failoverDelay: 0
+  imageName: ghcr.io/cloudnative-pg/postgresql:17-minimal-trixie
+  instances: 3
+  logLevel: info
+  maxSyncReplicas: 0
+  minSyncReplicas: 0
+  monitoring:
+    customQueriesConfigMap:
+      - key: queries
+        name: cnpg-default-monitoring
+    disableDefaultQueries: false
+    enablePodMonitor: false
+  postgresql:
+    parameters:
+      archive_mode: "on"
+      archive_timeout: 5min
+      dynamic_shared_memory_type: posix
+      effective_cache_size: 256MB
+      full_page_writes: "on"
+      log_destination: csvlog
+      log_directory: /controller/log
+      log_filename: postgres
+      log_rotation_age: "0"
+      log_rotation_size: "0"
+      log_truncate_on_rotation: "false"
+      logging_collector: "on"
+      max_connections: "200"
+      max_parallel_workers: "16"
+      max_replication_slots: "16"
+      max_worker_processes: "16"
+      shared_buffers: 128MB
+      shared_memory_type: mmap
+      ssl_max_protocol_version: TLSv1.3
+      ssl_min_protocol_version: TLSv1.3
+      wal_keep_size: 256MB
+      wal_level: logical
+      wal_log_hints: "on"
+      wal_receiver_timeout: 5s
+      wal_sender_timeout: 5s
+    syncReplicaElectionConstraint:
+      enabled: false
+  primaryUpdateMethod: restart
+  primaryUpdateStrategy: unsupervised
+  probes:
+    liveness:
+      isolationCheck:
+        connectionTimeout: 1000
+        enabled: true
+        requestTimeout: 1000
+  replicationSlots:
+    highAvailability:
+      enabled: true
+      slotPrefix: _cnpg_
+    synchronizeReplicas:
+      enabled: true
+    updateInterval: 30
+  resources:
+    limits:
+      cpu: 500m
+      memory: 512Mi
+    requests:
+      cpu: 250m
+      memory: 256Mi
+  smartShutdownTimeout: 180
+  startDelay: 3600
+  stopDelay: 1800
+  storage:
+    resizeInUseVolumes: true
+    size: 10Gi
+    storageClass: cephrbd-fast-delete
+  switchoverDelay: 3600
@@ -0,0 +1,33 @@
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Pooler
+metadata:
+  name: litellm-postgres-pooler
+  namespace: litellm
+spec:
+  cluster:
+    name: litellm-postgres
+  instances: 2
+  pgbouncer:
+    parameters:
+      default_pool_size: "100"
+      max_client_conn: "400"
+    paused: false
+    poolMode: session
+  template:
+    metadata:
+      labels:
+        app: pooler
+    spec:
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - pooler
+              topologyKey: kubernetes.io/hostname
+      containers: []
+  type: rw
@@ -0,0 +1,67 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm
+  namespace: litellm
+spec:
+  selector:
+    matchLabels:
+      app: litellm
+  template:
+    metadata:
+      labels:
+        app: litellm
+    spec:
+      containers:
+        - name: litellm
+          image: docker.litellm.ai/berriai/litellm-database:main-stable
+          imagePullPolicy: Always
+          args:
+            - --config
+            - /app/config.yaml
+            - --port
+            - "4000"
+            - --num_workers
+            - "8"
+          ports:
+            - containerPort: 4000
+              name: http
+              protocol: TCP
+          envFrom:
+            - secretRef:
+                name: litellm-credentials
+          livenessProbe:
+            httpGet:
+              path: /health/liveliness
+              port: 4000
+            failureThreshold: 3
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            successThreshold: 1
+            timeoutSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /health/readiness
+              port: 4000
+            failureThreshold: 3
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 5
+          resources:
+            limits:
+              cpu: "1"
+              memory: 2Gi
+            requests:
+              cpu: 250m
+              memory: 512Mi
+          volumeMounts:
+            - mountPath: /app/config.yaml
+              name: config
+              subPath: config.yaml
+      restartPolicy: Always
+      volumes:
+        - name: config
+          configMap:
+            name: litellm-config
@@ -0,0 +1,41 @@
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: litellm-hpa
+  namespace: litellm
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: litellm
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 60
+  behavior:
+    scaleUp:
+      stabilizationWindowSeconds: 0
+      selectPolicy: Max
+      policies:
+        - type: Percent
+          value: 100
+          periodSeconds: 30
+        - type: Pods
+          value: 4
+          periodSeconds: 30
+    scaleDown:
+      stabilizationWindowSeconds: 300
+      selectPolicy: Min
+      policies:
+        - type: Percent
+          value: 10
+          periodSeconds: 60
+        - type: Pods
+          value: 2
+          periodSeconds: 60
@@ -0,0 +1,29 @@
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    external-dns.alpha.kubernetes.io/hostname: litellm.k8s.syd1.au.unkin.net
+    external-dns.alpha.kubernetes.io/target: 198.18.200.0
+    cert-manager.io/cluster-issuer: vault-issuer
+    cert-manager.io/common-name: litellm.k8s.syd1.au.unkin.net
+    cert-manager.io/private-key-size: "4096"
+  name: litellm
+  namespace: litellm
+spec:
+  rules:
+    - host: litellm.k8s.syd1.au.unkin.net
+      http:
+        paths:
+          - backend:
+              service:
+                name: litellm
+                port:
+                  number: 4000
+            path: /
+            pathType: Prefix
+  tls:
+    - hosts:
+        - litellm.k8s.syd1.au.unkin.net
+      secretName: litellm-tls
@@ -0,0 +1,23 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - cnpg_cluster.yaml
+  - cnpg_pooler.yaml
+  - deployment.yaml
+  - hpa.yaml
+  - ingress.yaml
+  - namespace.yaml
+  - redis-deployment.yaml
+  - redis-pvc.yaml
+  - services.yaml
+  - vaultauth.yaml
+  - vaultstaticsecret.yaml
+
+configMapGenerator:
+  - name: litellm-config
+    files:
+      - config.yaml=resources/config.yaml
+    options:
+      disableNameSuffixHash: true
@@ -0,0 +1,5 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: litellm
@@ -0,0 +1,67 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: redis
+  namespace: litellm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+        - name: redis
+          image: redis:7-alpine
+          imagePullPolicy: IfNotPresent
+          command:
+            - redis-server
+            - --save
+            - "20"
+            - "1"
+          ports:
+            - containerPort: 6379
+              name: redis
+              protocol: TCP
+          livenessProbe:
+            exec:
+              command:
+                - redis-cli
+                - ping
+            failureThreshold: 3
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            successThreshold: 1
+            timeoutSeconds: 5
+          readinessProbe:
+            exec:
+              command:
+                - redis-cli
+                - ping
+            failureThreshold: 3
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 5
+          resources:
+            limits:
+              cpu: 500m
+              memory: 512Mi
+            requests:
+              cpu: 50m
+              memory: 128Mi
+          volumeMounts:
+            - mountPath: /data
+              mountPropagation: None
+              name: data
+      restartPolicy: Always
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: litellm-redis-data
@@ -0,0 +1,14 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: litellm-redis-data
+  namespace: litellm
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 5Gi
+  storageClassName: cephrbd-fast-delete
+  volumeMode: Filesystem
@@ -0,0 +1,15 @@
+model_list: []
+
+router_settings:
+  redis_host: redis-service
+  redis_port: 6379
+
+general_settings:
+  use_redis_transaction_buffer: true
+
+litellm_settings:
+  cache: true
+  cache_params:
+    type: redis
+    host: redis-service
+    port: 6379
@@ -0,0 +1,34 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm
+  namespace: litellm
+spec:
+  internalTrafficPolicy: Cluster
+  ports:
+    - name: http
+      port: 4000
+      protocol: TCP
+      targetPort: http
+  selector:
+    app: litellm
+  sessionAffinity: None
+  type: ClusterIP
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis-service
+  namespace: litellm
+spec:
+  internalTrafficPolicy: Cluster
+  ports:
+    - name: redis
+      port: 6379
+      protocol: TCP
+      targetPort: redis
+  selector:
+    app: redis
+  sessionAffinity: None
+  type: ClusterIP
@@ -0,0 +1,18 @@
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultAuth
+metadata:
+  name: default
+  namespace: litellm
+spec:
+  allowedNamespaces:
+    - litellm
+  kubernetes:
+    audiences:
+      - vault
+    role: default
+    serviceAccount: default
+    tokenExpirationSeconds: 600
+  method: kubernetes
+  mount: k8s/au/syd1
+  vaultConnectionRef: vso-system/default
@@ -0,0 +1,34 @@
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: postgres-credentials
+  namespace: litellm
+spec:
+  destination:
+    create: true
+    name: postgres-credentials
+    overwrite: true
+  hmacSecretData: true
+  mount: kv
+  path: kubernetes/namespace/litellm/default/postgres-credentials
+  refreshAfter: 5m
+  type: kv-v2
+  vaultAuthRef: default
+---
+apiVersion: secrets.hashicorp.com/v1beta1
+kind: VaultStaticSecret
+metadata:
+  name: litellm-credentials
+  namespace: litellm
+spec:
+  destination:
+    create: true
+    name: litellm-credentials
+    overwrite: true
+  hmacSecretData: true
+  mount: kv
+  path: kubernetes/namespace/litellm/default/litellm-credentials
+  refreshAfter: 5m
+  type: kv-v2
+  vaultAuthRef: default
@@ -0,0 +1,6 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../../base/litellm
@@ -0,0 +1,31 @@
+---
+apiVersion: argoproj.io/v1alpha1
+kind: ApplicationSet
+metadata:
+  name: aitooling-apps
+  namespace: argocd
+spec:
+  generators:
+  - git:
+      repoURL: https://git.unkin.net/unkin/argocd-apps
+      revision: HEAD
+      directories:
+      - path: apps/overlays/*/litellm
+  template:
+    metadata:
+      name: 'aitooling-{{path[3]}}'
+    spec:
+      project: aitooling
+      source:
+        repoURL: https://git.unkin.net/unkin/argocd-apps
+        targetRevision: HEAD
+        path: '{{path}}'
+      destination:
+        server: https://kubernetes.default.svc
+        namespace: '{{path[3]}}'
+      syncPolicy:
+        automated:
+          prune: true
+          selfHeal: true
+        syncOptions:
+        - ServerSideApply=true
@@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization

 resources:
+  - aitooling.yaml
  - observability.yaml
  - platform.yaml
  - storage.yaml
@@ -0,0 +1,19 @@
+---
+apiVersion: argoproj.io/v1alpha1
+kind: AppProject
+metadata:
+  name: aitooling
+  namespace: argocd
+spec:
+  description: AI tooling services
+  sourceRepos:
+    - https://git.unkin.net/unkin/argocd-apps
+  destinations:
+    - namespace: 'litellm'
+      server: https://kubernetes.default.svc
+  clusterResourceWhitelist:
+    - group: ''
+      kind: Namespace
+  namespaceResourceWhitelist:
+    - group: '*'
+      kind: '*'
@@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization

 resources:
+  - aitooling.yaml
  - observability.yaml
  - platform.yaml
  - storage.yaml