41ab3ff614
The k8s au-syd1 VictoriaMetrics stack ran as two helm charts (victoria-metrics-cluster + victoria-metrics-agent) and only scraped in-cluster targets. The victoria-metrics-operator already runs in vm-system, so move the stack onto operator-managed CRDs. This lets the VMAgent consume VMServiceScrape/VMPodScrape (auto-converted from Prometheus ServiceMonitors) and adds Consul service discovery so the cluster scrapes the same puppet-prod targets as the puppet vmagent. Changes: - Add VMCluster `main`: vmstorage 2 replicas (down from 3, replicationFactor 2, cephrbd-fast-delete 200Gi, 180d retention), vminsert/vmselect 2 replicas + HPA (2-10, 60% cpu). - Add VMAgent `main`: keeps the kubernetes SD jobs (apiservers/nodes/cadvisor), selectAllByDefault for VMServiceScrape/VMPodScrape, and a Consul SD job against consul.service.consul (puppet Consul) replicating the puppet vmagent relabels (keep tag `metrics`, scheme from `metrics_scheme`, job from `metrics_job`). TLS verified against the reflected vault-ca-cert (no insecure skip-verify). - Expose vmselect/vminsert/vmagent via Gateway API (traefik-internal Gateway + HTTPRoute, http->https redirect), same hostnames as before. - Remove the two helm charts, their values files, and vendored charts.
123 lines
4.0 KiB
YAML
123 lines
4.0 KiB
YAML
---
|
|
apiVersion: operator.victoriametrics.com/v1beta1
|
|
kind: VMAgent
|
|
metadata:
|
|
name: main
|
|
namespace: observability
|
|
spec:
|
|
replicaCount: 2
|
|
scrapeInterval: 15s
|
|
# Also consume VMServiceScrape / VMPodScrape / VMNodeScrape from every namespace
|
|
# (the operator auto-converts Prometheus ServiceMonitors -> VMServiceScrape).
|
|
selectAllByDefault: true
|
|
extraArgs:
|
|
loggerFormat: json
|
|
remoteWrite:
|
|
- url: http://vminsert-main.observability.svc.cluster.local:8480/insert/0/prometheus/
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
limits:
|
|
cpu: "1"
|
|
memory: 2Gi
|
|
# Reflected Vault intermediate CA (unkin.net) for verifying puppet Consul + metrics targets.
|
|
volumes:
|
|
- name: vault-ca
|
|
secret:
|
|
secretName: vault-ca-cert
|
|
volumeMounts:
|
|
- name: vault-ca
|
|
mountPath: /etc/vmagent-tls
|
|
readOnly: true
|
|
inlineScrapeConfig: |
|
|
- job_name: vmagent
|
|
static_configs:
|
|
- targets: ["localhost:8429"]
|
|
- job_name: "kubernetes-apiservers"
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
- __meta_kubernetes_service_name
|
|
- __meta_kubernetes_endpoint_port_name
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
- job_name: "kubernetes-nodes"
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- job_name: "kubernetes-nodes-cadvisor"
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
metrics_path: /metrics/cadvisor
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- source_labels: [__metrics_path__]
|
|
target_label: metrics_path
|
|
metric_relabel_configs:
|
|
- action: replace
|
|
source_labels: [pod]
|
|
regex: '(.+)'
|
|
target_label: pod_name
|
|
replacement: '${1}'
|
|
- action: replace
|
|
source_labels: [container]
|
|
regex: '(.+)'
|
|
target_label: container_name
|
|
replacement: '${1}'
|
|
- action: replace
|
|
target_label: name
|
|
replacement: k8s_stub
|
|
- action: replace
|
|
source_labels: [id]
|
|
regex: '^/system\.slice/(.+)\.service$'
|
|
target_label: systemd_service_name
|
|
replacement: '${1}'
|
|
# puppet-prod Consul service discovery (same targets as the puppet vmagent).
|
|
# consul.service.consul resolves to the puppet Consul from in-cluster pods.
|
|
- job_name: consul
|
|
consul_sd_configs:
|
|
- server: consul.service.consul:443
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /etc/vmagent-tls/ca.crt
|
|
relabel_configs:
|
|
- source_labels: [__meta_consul_tagpresent_metrics]
|
|
regex: "true"
|
|
action: keep
|
|
- source_labels: [__meta_consul_node, __meta_consul_service_port]
|
|
separator: ":"
|
|
target_label: __address__
|
|
replacement: "${1}:${2}"
|
|
action: replace
|
|
- source_labels: [__meta_consul_tag_metrics_scheme]
|
|
target_label: __scheme__
|
|
action: replace
|
|
- target_label: __metrics_path__
|
|
replacement: /metrics
|
|
- source_labels: [__meta_consul_tag_metrics_job]
|
|
target_label: job
|
|
action: replace
|
|
tls_config:
|
|
ca_file: /etc/vmagent-tls/ca.crt
|