diff --git a/apps/base/litellm/cnpg_cluster.yaml b/apps/base/litellm/cnpg_cluster.yaml new file mode 100644 index 0000000..ea95389 --- /dev/null +++ b/apps/base/litellm/cnpg_cluster.yaml @@ -0,0 +1,91 @@ +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: litellm-postgres + namespace: litellm +spec: + affinity: + podAntiAffinityType: preferred + bootstrap: + initdb: + database: litellm + encoding: UTF8 + localeCType: C + localeCollate: C + owner: litellm + secret: + name: postgres-credentials + enablePDB: true + enableSuperuserAccess: false + failoverDelay: 0 + imageName: ghcr.io/cloudnative-pg/postgresql:17-minimal-trixie + instances: 3 + logLevel: info + maxSyncReplicas: 0 + minSyncReplicas: 0 + monitoring: + customQueriesConfigMap: + - key: queries + name: cnpg-default-monitoring + disableDefaultQueries: false + enablePodMonitor: false + postgresql: + parameters: + archive_mode: "on" + archive_timeout: 5min + dynamic_shared_memory_type: posix + effective_cache_size: 256MB + full_page_writes: "on" + log_destination: csvlog + log_directory: /controller/log + log_filename: postgres + log_rotation_age: "0" + log_rotation_size: "0" + log_truncate_on_rotation: "false" + logging_collector: "on" + max_connections: "200" + max_parallel_workers: "16" + max_replication_slots: "16" + max_worker_processes: "16" + shared_buffers: 128MB + shared_memory_type: mmap + ssl_max_protocol_version: TLSv1.3 + ssl_min_protocol_version: TLSv1.3 + wal_keep_size: 256MB + wal_level: logical + wal_log_hints: "on" + wal_receiver_timeout: 5s + wal_sender_timeout: 5s + syncReplicaElectionConstraint: + enabled: false + primaryUpdateMethod: restart + primaryUpdateStrategy: unsupervised + probes: + liveness: + isolationCheck: + connectionTimeout: 1000 + enabled: true + requestTimeout: 1000 + replicationSlots: + highAvailability: + enabled: true + slotPrefix: _cnpg_ + synchronizeReplicas: + enabled: true + updateInterval: 30 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 250m + memory: 256Mi + smartShutdownTimeout: 180 + startDelay: 3600 + stopDelay: 1800 + storage: + resizeInUseVolumes: true + size: 10Gi + storageClass: cephrbd-fast-delete + switchoverDelay: 3600 diff --git a/apps/base/litellm/cnpg_pooler.yaml b/apps/base/litellm/cnpg_pooler.yaml new file mode 100644 index 0000000..c325e69 --- /dev/null +++ b/apps/base/litellm/cnpg_pooler.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Pooler +metadata: + name: litellm-postgres-pooler + namespace: litellm +spec: + cluster: + name: litellm-postgres + instances: 2 + pgbouncer: + parameters: + default_pool_size: "100" + max_client_conn: "400" + paused: false + poolMode: session + template: + metadata: + labels: + app: pooler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - pooler + topologyKey: kubernetes.io/hostname + containers: [] + type: rw diff --git a/apps/base/litellm/deployment.yaml b/apps/base/litellm/deployment.yaml new file mode 100644 index 0000000..e62c600 --- /dev/null +++ b/apps/base/litellm/deployment.yaml @@ -0,0 +1,67 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm + namespace: litellm +spec: + selector: + matchLabels: + app: litellm + template: + metadata: + labels: + app: litellm + spec: + containers: + - name: litellm + image: docker.litellm.ai/berriai/litellm-database:main-stable + imagePullPolicy: Always + args: + - --config + - /app/config.yaml + - --port + - "4000" + - --num_workers + - "8" + ports: + - containerPort: 4000 + name: http + protocol: TCP + envFrom: + - secretRef: + name: litellm-credentials + livenessProbe: + httpGet: + path: /health/liveliness + port: 4000 + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /health/readiness + port: 4000 + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: 250m + memory: 512Mi + volumeMounts: + - mountPath: /app/config.yaml + name: config + subPath: config.yaml + restartPolicy: Always + volumes: + - name: config + configMap: + name: litellm-config diff --git a/apps/base/litellm/hpa.yaml b/apps/base/litellm/hpa.yaml new file mode 100644 index 0000000..b954aa8 --- /dev/null +++ b/apps/base/litellm/hpa.yaml @@ -0,0 +1,41 @@ +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: litellm-hpa + namespace: litellm +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: litellm + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + behavior: + scaleUp: + stabilizationWindowSeconds: 0 + selectPolicy: Max + policies: + - type: Percent + value: 100 + periodSeconds: 30 + - type: Pods + value: 4 + periodSeconds: 30 + scaleDown: + stabilizationWindowSeconds: 300 + selectPolicy: Min + policies: + - type: Percent + value: 10 + periodSeconds: 60 + - type: Pods + value: 2 + periodSeconds: 60 diff --git a/apps/base/litellm/ingress.yaml b/apps/base/litellm/ingress.yaml new file mode 100644 index 0000000..ad8c713 --- /dev/null +++ b/apps/base/litellm/ingress.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + external-dns.alpha.kubernetes.io/hostname: litellm.k8s.syd1.au.unkin.net + external-dns.alpha.kubernetes.io/target: 198.18.200.0 + cert-manager.io/cluster-issuer: vault-issuer + cert-manager.io/common-name: litellm.k8s.syd1.au.unkin.net + cert-manager.io/private-key-size: "4096" + name: litellm + namespace: litellm +spec: + rules: + - host: litellm.k8s.syd1.au.unkin.net + http: + paths: + - backend: + service: + name: litellm + port: + number: 4000 + path: / + pathType: Prefix + tls: + - hosts: + - litellm.k8s.syd1.au.unkin.net + secretName: litellm-tls diff --git a/apps/base/litellm/kustomization.yaml b/apps/base/litellm/kustomization.yaml new file mode 100644 index 0000000..5f4b578 --- /dev/null +++ b/apps/base/litellm/kustomization.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - cnpg_cluster.yaml + - cnpg_pooler.yaml + - deployment.yaml + - hpa.yaml + - ingress.yaml + - namespace.yaml + - redis-deployment.yaml + - redis-pvc.yaml + - services.yaml + - vaultauth.yaml + - vaultstaticsecret.yaml + +configMapGenerator: + - name: litellm-config + files: + - config.yaml=resources/config.yaml + options: + disableNameSuffixHash: true diff --git a/apps/base/litellm/namespace.yaml b/apps/base/litellm/namespace.yaml new file mode 100644 index 0000000..1daa5a5 --- /dev/null +++ b/apps/base/litellm/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: litellm diff --git a/apps/base/litellm/redis-deployment.yaml b/apps/base/litellm/redis-deployment.yaml new file mode 100644 index 0000000..cf97be0 --- /dev/null +++ b/apps/base/litellm/redis-deployment.yaml @@ -0,0 +1,67 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + namespace: litellm +spec: + replicas: 1 + selector: + matchLabels: + app: redis + strategy: + type: Recreate + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:7-alpine + imagePullPolicy: IfNotPresent + command: + - redis-server + - --save + - "20" + - "1" + ports: + - containerPort: 6379 + name: redis + protocol: TCP + livenessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 50m + memory: 128Mi + volumeMounts: + - mountPath: /data + mountPropagation: None + name: data + restartPolicy: Always + volumes: + - name: data + persistentVolumeClaim: + claimName: litellm-redis-data diff --git a/apps/base/litellm/redis-pvc.yaml b/apps/base/litellm/redis-pvc.yaml new file mode 100644 index 0000000..34dd0a1 --- /dev/null +++ b/apps/base/litellm/redis-pvc.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: litellm-redis-data + namespace: litellm +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: cephrbd-fast-delete + volumeMode: Filesystem diff --git a/apps/base/litellm/resources/config.yaml b/apps/base/litellm/resources/config.yaml new file mode 100644 index 0000000..f21f47a --- /dev/null +++ b/apps/base/litellm/resources/config.yaml @@ -0,0 +1,15 @@ +model_list: [] + +router_settings: + redis_host: redis-service + redis_port: 6379 + +general_settings: + use_redis_transaction_buffer: true + +litellm_settings: + cache: true + cache_params: + type: redis + host: redis-service + port: 6379 diff --git a/apps/base/litellm/services.yaml b/apps/base/litellm/services.yaml new file mode 100644 index 0000000..05d1218 --- /dev/null +++ b/apps/base/litellm/services.yaml @@ -0,0 +1,34 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: litellm + namespace: litellm +spec: + internalTrafficPolicy: Cluster + ports: + - name: http + port: 4000 + protocol: TCP + targetPort: http + selector: + app: litellm + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-service + namespace: litellm +spec: + internalTrafficPolicy: Cluster + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: redis + selector: + app: redis + sessionAffinity: None + type: ClusterIP diff --git a/apps/base/litellm/vaultauth.yaml b/apps/base/litellm/vaultauth.yaml new file mode 100644 index 0000000..a75a0d6 --- /dev/null +++ b/apps/base/litellm/vaultauth.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultAuth +metadata: + name: default + namespace: litellm +spec: + allowedNamespaces: + - litellm + kubernetes: + audiences: + - vault + role: default + serviceAccount: default + tokenExpirationSeconds: 600 + method: kubernetes + mount: k8s/au/syd1 + vaultConnectionRef: vso-system/default diff --git a/apps/base/litellm/vaultstaticsecret.yaml b/apps/base/litellm/vaultstaticsecret.yaml new file mode 100644 index 0000000..be33d93 --- /dev/null +++ b/apps/base/litellm/vaultstaticsecret.yaml @@ -0,0 +1,34 @@ +--- +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultStaticSecret +metadata: + name: postgres-credentials + namespace: litellm +spec: + destination: + create: true + name: postgres-credentials + overwrite: true + hmacSecretData: true + mount: kv + path: kubernetes/namespace/litellm/default/postgres-credentials + refreshAfter: 5m + type: kv-v2 + vaultAuthRef: default +--- +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultStaticSecret +metadata: + name: litellm-credentials + namespace: litellm +spec: + destination: + create: true + name: litellm-credentials + overwrite: true + hmacSecretData: true + mount: kv + path: kubernetes/namespace/litellm/default/litellm-credentials + refreshAfter: 5m + type: kv-v2 + vaultAuthRef: default diff --git a/apps/overlays/au-syd1/litellm/kustomization.yaml b/apps/overlays/au-syd1/litellm/kustomization.yaml new file mode 100644 index 0000000..35c3bbb --- /dev/null +++ b/apps/overlays/au-syd1/litellm/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../../base/litellm diff --git a/argocd/applicationsets/aitooling.yaml b/argocd/applicationsets/aitooling.yaml new file mode 100644 index 0000000..9088bda --- /dev/null +++ b/argocd/applicationsets/aitooling.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: aitooling-apps + namespace: argocd +spec: + generators: + - git: + repoURL: https://git.unkin.net/unkin/argocd-apps + revision: HEAD + directories: + - path: apps/overlays/*/litellm + template: + metadata: + name: 'aitooling-{{path[3]}}' + spec: + project: aitooling + source: + repoURL: https://git.unkin.net/unkin/argocd-apps + targetRevision: HEAD + path: '{{path}}' + destination: + server: https://kubernetes.default.svc + namespace: '{{path[3]}}' + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - ServerSideApply=true diff --git a/argocd/applicationsets/kustomization.yaml b/argocd/applicationsets/kustomization.yaml index b8329a8..87d07c2 100644 --- a/argocd/applicationsets/kustomization.yaml +++ b/argocd/applicationsets/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - aitooling.yaml - observability.yaml - platform.yaml - storage.yaml diff --git a/argocd/projects/aitooling.yaml b/argocd/projects/aitooling.yaml new file mode 100644 index 0000000..adc5f6c --- /dev/null +++ b/argocd/projects/aitooling.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: AppProject +metadata: + name: aitooling + namespace: argocd +spec: + description: AI tooling services + sourceRepos: + - https://git.unkin.net/unkin/argocd-apps + destinations: + - namespace: 'litellm' + server: https://kubernetes.default.svc + clusterResourceWhitelist: + - group: '' + kind: Namespace + namespaceResourceWhitelist: + - group: '*' + kind: '*' diff --git a/argocd/projects/kustomization.yaml b/argocd/projects/kustomization.yaml index b8329a8..87d07c2 100644 --- a/argocd/projects/kustomization.yaml +++ b/argocd/projects/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - aitooling.yaml - observability.yaml - platform.yaml - storage.yaml