feat: migrate observability to ArgoCD (#82)

Migrate Victoria Metrics cluster and agent from Terragrunt to ArgoCD/Kustomize.
Creates new observability AppProject and ApplicationSet.
Deploys victoria-metrics-cluster v0.33.0 (vmselect/vminsert/vmstorage with
HPA, PDB, ingress) and victoria-metrics-agent v0.30.0 (3 replicas, k8s scrape
configs) in the observability namespace.

💘 Generated with Crush

Assisted-by: Claude Sonnet 4.6 via Crush <crush@charm.land>

Reviewed-on: #82
This commit was merged in pull request #82.
This commit is contained in:
2026-04-07 19:15:45 +10:00
parent c3a145acbf
commit b100f3034e
9 changed files with 379 additions and 0 deletions
@@ -0,0 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
+7
View File
@@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
labels:
app.kubernetes.io/name: observability
name: observability
@@ -0,0 +1,22 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: observability
resources:
- ../../../base/observability
helmCharts:
- name: victoria-metrics-cluster
repo: https://victoriametrics.github.io/helm-charts/
version: "0.33.0"
releaseName: victoria-metrics-cluster
namespace: observability
valuesFile: values-vmcluster.yaml
- name: victoria-metrics-agent
repo: https://victoriametrics.github.io/helm-charts/
version: "0.30.0"
releaseName: victoria-metrics-agent
namespace: observability
valuesFile: values-vmagent.yaml
@@ -0,0 +1,102 @@
image:
repository: victoriametrics/vmagent
pullPolicy: IfNotPresent
global:
scrape_interval: 15s
podDisruptionBudget:
enabled: true
maxUnavailable: 1
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8481"
replicaCount: 3
extraArgs:
envflag.enable: true
envflag.prefix: VM_
loggerFormat: json
httpListenAddr: :8429
service:
enabled: true
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: vault-issuer
cert-manager.io/common-name: vmagent.k8s.syd1.au.unkin.net
cert-manager.io/private-key-size: "4096"
external-dns.alpha.kubernetes.io/hostname: vmagent.k8s.syd1.au.unkin.net
external-dns.alpha.kubernetes.io/target: 198.18.200.0
hosts:
- name: vmagent.k8s.syd1.au.unkin.net
path:
- /
port: http
tls:
- hosts:
- vmagent.k8s.syd1.au.unkin.net
secretName: vmagent-tls
ingressClassName: nginx
remoteWrite:
- url: http://victoria-metrics-cluster-vminsert.observability.svc.cluster.local:8480/insert/0/prometheus/
scrape_configs:
- job_name: vmagent
static_configs:
- targets: ["localhost:8429"]
- job_name: "kubernetes-apiservers"
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
action: keep
regex: default;kubernetes;https
- job_name: "kubernetes-nodes"
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: "kubernetes-nodes-cadvisor"
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__metrics_path__]
target_label: metrics_path
metric_relabel_configs:
- action: replace
source_labels: [pod]
regex: '(.+)'
target_label: pod_name
replacement: '${1}'
- action: replace
source_labels: [container]
regex: '(.+)'
target_label: container_name
replacement: '${1}'
- action: replace
target_label: name
replacement: k8s_stub
- action: replace
source_labels: [id]
regex: '^/system\.slice/(.+)\.service$'
target_label: systemd_service_name
replacement: '${1}'
@@ -0,0 +1,185 @@
vmselect:
enabled: true
image:
repository: victoriametrics/vmselect
pullPolicy: IfNotPresent
variant: cluster
extraArgs:
envflag.enable: true
envflag.prefix: VM_
loggerFormat: json
httpListenAddr: :8481
dedup.minScrapeInterval: 15s
replicationFactor: 2
resources:
limits:
cpu: 500m
memory: 1024Mi
requests:
cpu: 50m
memory: 128Mi
horizontalPodAutoscaler:
enabled: true
maxReplicas: 10
minReplicas: 2
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 60
behavior:
scaleUp:
stabilizationWindowSeconds: 0
selectPolicy: Max
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
scaleDown:
stabilizationWindowSeconds: 300
selectPolicy: Min
policies:
- type: Percent
value: 10
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8481"
podDisruptionBudget:
enabled: true
maxUnavailable: 1
replicaCount: 2
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: vault-issuer
cert-manager.io/common-name: vmselect.k8s.syd1.au.unkin.net
cert-manager.io/private-key-size: "4096"
external-dns.alpha.kubernetes.io/hostname: vmselect.k8s.syd1.au.unkin.net
external-dns.alpha.kubernetes.io/target: 198.18.200.0
hosts:
- name: vmselect.k8s.syd1.au.unkin.net
path:
- /
port: http
tls:
- hosts:
- vmselect.k8s.syd1.au.unkin.net
secretName: vmselect-tls
ingressClassName: nginx
vminsert:
enabled: true
image:
repository: victoriametrics/vminsert
variant: cluster
pullPolicy: IfNotPresent
extraArgs:
envflag.enable: true
envflag.prefix: VM_
loggerFormat: json
httpListenAddr: :8480
replicationFactor: 2
resources:
limits:
cpu: 500m
memory: 1024Mi
requests:
cpu: 50m
memory: 128Mi
horizontalPodAutoscaler:
enabled: true
maxReplicas: 10
minReplicas: 2
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 60
behavior:
scaleUp:
stabilizationWindowSeconds: 0
selectPolicy: Max
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
scaleDown:
stabilizationWindowSeconds: 300
selectPolicy: Min
policies:
- type: Percent
value: 10
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8480"
podDisruptionBudget:
enabled: true
maxUnavailable: 1
replicaCount: 2
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: vault-issuer
cert-manager.io/common-name: vminsert.k8s.syd1.au.unkin.net
cert-manager.io/private-key-size: "4096"
external-dns.alpha.kubernetes.io/hostname: vminsert.k8s.syd1.au.unkin.net
external-dns.alpha.kubernetes.io/target: 198.18.200.0
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
hosts:
- name: vminsert.k8s.syd1.au.unkin.net
path:
- /
port: http
tls:
- hosts:
- vminsert.k8s.syd1.au.unkin.net
secretName: vminsert-tls
ingressClassName: nginx
vmstorage:
enabled: true
image:
repository: victoriametrics/vmstorage
variant: cluster
pullPolicy: IfNotPresent
retentionPeriod: 180d
extraArgs:
envflag.enable: true
envflag.prefix: VM_
loggerFormat: json
httpListenAddr: :8482
dedup.minScrapeInterval: 15s
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8482"
podDisruptionBudget:
enabled: true
maxUnavailable: 1
persistentVolume:
enabled: true
name: vmstorage-volume
accessModes:
- ReadWriteOnce
storageClassName: cephrbd-fast-delete
mountPath: /storage
size: 200Gi
replicaCount: 3
podManagementPolicy: OrderedReady
@@ -3,5 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- observability.yaml
- platform.yaml
- storage.yaml
+31
View File
@@ -0,0 +1,31 @@
---
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
name: observability-apps
namespace: argocd
spec:
generators:
- git:
repoURL: https://git.unkin.net/unkin/argocd-apps
revision: HEAD
directories:
- path: apps/overlays/*/observability
template:
metadata:
name: 'observability-{{path[3]}}'
spec:
project: observability
source:
repoURL: https://git.unkin.net/unkin/argocd-apps
targetRevision: HEAD
path: '{{path}}'
destination:
server: https://kubernetes.default.svc
namespace: '{{path[3]}}'
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- ServerSideApply=true
+1
View File
@@ -3,5 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- observability.yaml
- platform.yaml
- storage.yaml
+24
View File
@@ -0,0 +1,24 @@
---
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: observability
namespace: argocd
spec:
description: Observability stack (metrics, monitoring)
sourceRepos:
- https://git.unkin.net/unkin/argocd-apps
- https://victoriametrics.github.io/helm-charts/
destinations:
- namespace: 'observability'
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: ''
kind: Namespace
- group: 'rbac.authorization.k8s.io'
kind: ClusterRole
- group: 'rbac.authorization.k8s.io'
kind: ClusterRoleBinding
namespaceResourceWhitelist:
- group: '*'
kind: '*'