observability: migrate VictoriaMetrics to operator CRDs + Consul SD (#234)
## Why The k8s au-syd1 VictoriaMetrics stack ran as two helm charts and only scraped in-cluster targets. The victoria-metrics-operator already runs in vm-system, so this moves the stack onto operator-managed CRDs. That unlocks VMServiceScrape/VMPodScrape (auto-converted from Prometheus ServiceMonitors, used by a follow-up PR) and adds Consul service discovery so the cluster scrapes the **same puppet-prod targets** as the puppet vmagent. Also shrinks vmstorage 3 → 2 (Ceph-backed, replicationFactor 2). ## Changes - Add **VMCluster `main`**: vmstorage 2 replicas (cephrbd-fast-delete 200Gi, 180d retention, replicationFactor 2), vminsert/vmselect 2 replicas + HPA (2–10, 60% cpu). - Add **VMAgent `main`**: retains the kubernetes SD jobs (apiservers/nodes/cadvisor), `selectAllByDefault` for VMServiceScrape/VMPodScrape, and a **Consul SD job** against `consul.service.consul` (resolves to the puppet Consul from pods) replicating the puppet vmagent relabels — keep tag `metrics`, `__scheme__` from `metrics_scheme`, `job` from `metrics_job`. TLS is **verified against the reflected `vault-ca-cert`** (no insecure skip-verify). - Expose vmselect/vminsert/vmagent via **Gateway API** (traefik-internal Gateway + HTTPRoute, http→https redirect), same hostnames. - Remove the two helm charts, their values files, and vendored charts. ## Notes - Data wipe on cutover is acceptable (confirmed) — old helm PVCs can be deleted. - Verify at rollout: pods resolve `*.main.unkin.net` node FQDNs (needed for CA SAN match on scrape targets); `/targets` shows `job=consul`. Reviewed-on: #234 Co-authored-by: Ben Vincent <ben@unkin.net> Co-committed-by: Ben Vincent <ben@unkin.net>
This commit was merged in pull request #234.
This commit is contained in:
@@ -0,0 +1,117 @@
|
|||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: Gateway
|
||||||
|
metadata:
|
||||||
|
name: vmselect
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmselect
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
traefik.io/instance: internal
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: vault-issuer
|
||||||
|
cert-manager.io/common-name: vmselect.k8s.syd1.au.unkin.net
|
||||||
|
cert-manager.io/private-key-size: "4096"
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: vmselect.k8s.syd1.au.unkin.net
|
||||||
|
external-dns.alpha.kubernetes.io/target: 198.18.200.4
|
||||||
|
spec:
|
||||||
|
gatewayClassName: traefik-internal
|
||||||
|
listeners:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: HTTP
|
||||||
|
hostname: vmselect.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
- name: https
|
||||||
|
port: 443
|
||||||
|
protocol: HTTPS
|
||||||
|
hostname: vmselect.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
tls:
|
||||||
|
mode: Terminate
|
||||||
|
certificateRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Secret
|
||||||
|
name: vmselect-tls
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: Gateway
|
||||||
|
metadata:
|
||||||
|
name: vminsert
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vminsert
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
traefik.io/instance: internal
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: vault-issuer
|
||||||
|
cert-manager.io/common-name: vminsert.k8s.syd1.au.unkin.net
|
||||||
|
cert-manager.io/private-key-size: "4096"
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: vminsert.k8s.syd1.au.unkin.net
|
||||||
|
external-dns.alpha.kubernetes.io/target: 198.18.200.4
|
||||||
|
spec:
|
||||||
|
gatewayClassName: traefik-internal
|
||||||
|
listeners:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: HTTP
|
||||||
|
hostname: vminsert.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
- name: https
|
||||||
|
port: 443
|
||||||
|
protocol: HTTPS
|
||||||
|
hostname: vminsert.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
tls:
|
||||||
|
mode: Terminate
|
||||||
|
certificateRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Secret
|
||||||
|
name: vminsert-tls
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: Gateway
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
traefik.io/instance: internal
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: vault-issuer
|
||||||
|
cert-manager.io/common-name: vmagent.k8s.syd1.au.unkin.net
|
||||||
|
cert-manager.io/private-key-size: "4096"
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: vmagent.k8s.syd1.au.unkin.net
|
||||||
|
external-dns.alpha.kubernetes.io/target: 198.18.200.4
|
||||||
|
spec:
|
||||||
|
gatewayClassName: traefik-internal
|
||||||
|
listeners:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: HTTP
|
||||||
|
hostname: vmagent.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
- name: https
|
||||||
|
port: 443
|
||||||
|
protocol: HTTPS
|
||||||
|
hostname: vmagent.k8s.syd1.au.unkin.net
|
||||||
|
allowedRoutes:
|
||||||
|
namespaces:
|
||||||
|
from: Same
|
||||||
|
tls:
|
||||||
|
mode: Terminate
|
||||||
|
certificateRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Secret
|
||||||
|
name: vmagent-tls
|
||||||
@@ -0,0 +1,165 @@
|
|||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vmselect-http-redirect
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmselect
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vmselect.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vmselect
|
||||||
|
sectionName: http
|
||||||
|
rules:
|
||||||
|
- filters:
|
||||||
|
- type: RequestRedirect
|
||||||
|
requestRedirect:
|
||||||
|
scheme: https
|
||||||
|
statusCode: 301
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vmselect
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmselect
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vmselect.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vmselect
|
||||||
|
sectionName: https
|
||||||
|
rules:
|
||||||
|
- backendRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Service
|
||||||
|
name: vmselect-main
|
||||||
|
port: 8481
|
||||||
|
weight: 1
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vminsert-http-redirect
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vminsert
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vminsert.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vminsert
|
||||||
|
sectionName: http
|
||||||
|
rules:
|
||||||
|
- filters:
|
||||||
|
- type: RequestRedirect
|
||||||
|
requestRedirect:
|
||||||
|
scheme: https
|
||||||
|
statusCode: 301
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vminsert
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vminsert
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vminsert.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vminsert
|
||||||
|
sectionName: https
|
||||||
|
rules:
|
||||||
|
- backendRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Service
|
||||||
|
name: vminsert-main
|
||||||
|
port: 8480
|
||||||
|
weight: 1
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vmagent-http-redirect
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vmagent.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vmagent
|
||||||
|
sectionName: http
|
||||||
|
rules:
|
||||||
|
- filters:
|
||||||
|
- type: RequestRedirect
|
||||||
|
requestRedirect:
|
||||||
|
scheme: https
|
||||||
|
statusCode: 301
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
|
---
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: HTTPRoute
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/instance: victoria-metrics
|
||||||
|
spec:
|
||||||
|
hostnames:
|
||||||
|
- vmagent.k8s.syd1.au.unkin.net
|
||||||
|
parentRefs:
|
||||||
|
- group: gateway.networking.k8s.io
|
||||||
|
kind: Gateway
|
||||||
|
name: vmagent
|
||||||
|
sectionName: https
|
||||||
|
rules:
|
||||||
|
- backendRefs:
|
||||||
|
- group: ""
|
||||||
|
kind: Service
|
||||||
|
name: vmagent-main
|
||||||
|
port: 8429
|
||||||
|
weight: 1
|
||||||
|
matches:
|
||||||
|
- path:
|
||||||
|
type: PathPrefix
|
||||||
|
value: /
|
||||||
@@ -4,3 +4,7 @@ kind: Kustomization
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
|
- vmcluster.yaml
|
||||||
|
- vmagent.yaml
|
||||||
|
- gateway.yaml
|
||||||
|
- httproute.yaml
|
||||||
|
|||||||
@@ -0,0 +1,122 @@
|
|||||||
|
---
|
||||||
|
apiVersion: operator.victoriametrics.com/v1beta1
|
||||||
|
kind: VMAgent
|
||||||
|
metadata:
|
||||||
|
name: main
|
||||||
|
namespace: observability
|
||||||
|
spec:
|
||||||
|
replicaCount: 2
|
||||||
|
scrapeInterval: 15s
|
||||||
|
# Also consume VMServiceScrape / VMPodScrape / VMNodeScrape from every namespace
|
||||||
|
# (the operator auto-converts Prometheus ServiceMonitors -> VMServiceScrape).
|
||||||
|
selectAllByDefault: true
|
||||||
|
extraArgs:
|
||||||
|
loggerFormat: json
|
||||||
|
remoteWrite:
|
||||||
|
- url: http://vminsert-main.observability.svc.cluster.local:8480/insert/0/prometheus/
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
limits:
|
||||||
|
cpu: "1"
|
||||||
|
memory: 2Gi
|
||||||
|
# Reflected Vault intermediate CA (unkin.net) for verifying puppet Consul + metrics targets.
|
||||||
|
volumes:
|
||||||
|
- name: vault-ca
|
||||||
|
secret:
|
||||||
|
secretName: vault-ca-cert
|
||||||
|
volumeMounts:
|
||||||
|
- name: vault-ca
|
||||||
|
mountPath: /etc/vmagent-tls
|
||||||
|
readOnly: true
|
||||||
|
inlineScrapeConfig: |
|
||||||
|
- job_name: vmagent
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:8429"]
|
||||||
|
- job_name: "kubernetes-apiservers"
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: endpoints
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
insecure_skip_verify: true
|
||||||
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels:
|
||||||
|
- __meta_kubernetes_namespace
|
||||||
|
- __meta_kubernetes_service_name
|
||||||
|
- __meta_kubernetes_endpoint_port_name
|
||||||
|
action: keep
|
||||||
|
regex: default;kubernetes;https
|
||||||
|
- job_name: "kubernetes-nodes"
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
insecure_skip_verify: true
|
||||||
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: node
|
||||||
|
relabel_configs:
|
||||||
|
- action: labelmap
|
||||||
|
regex: __meta_kubernetes_node_label_(.+)
|
||||||
|
- job_name: "kubernetes-nodes-cadvisor"
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
insecure_skip_verify: true
|
||||||
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: node
|
||||||
|
metrics_path: /metrics/cadvisor
|
||||||
|
relabel_configs:
|
||||||
|
- action: labelmap
|
||||||
|
regex: __meta_kubernetes_node_label_(.+)
|
||||||
|
- source_labels: [__metrics_path__]
|
||||||
|
target_label: metrics_path
|
||||||
|
metric_relabel_configs:
|
||||||
|
- action: replace
|
||||||
|
source_labels: [pod]
|
||||||
|
regex: '(.+)'
|
||||||
|
target_label: pod_name
|
||||||
|
replacement: '${1}'
|
||||||
|
- action: replace
|
||||||
|
source_labels: [container]
|
||||||
|
regex: '(.+)'
|
||||||
|
target_label: container_name
|
||||||
|
replacement: '${1}'
|
||||||
|
- action: replace
|
||||||
|
target_label: name
|
||||||
|
replacement: k8s_stub
|
||||||
|
- action: replace
|
||||||
|
source_labels: [id]
|
||||||
|
regex: '^/system\.slice/(.+)\.service$'
|
||||||
|
target_label: systemd_service_name
|
||||||
|
replacement: '${1}'
|
||||||
|
# puppet-prod Consul service discovery (same targets as the puppet vmagent).
|
||||||
|
# consul.service.consul resolves to the puppet Consul from in-cluster pods.
|
||||||
|
- job_name: consul
|
||||||
|
consul_sd_configs:
|
||||||
|
- server: consul.service.consul:443
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /etc/vmagent-tls/ca.crt
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_consul_tagpresent_metrics]
|
||||||
|
regex: "true"
|
||||||
|
action: keep
|
||||||
|
- source_labels: [__meta_consul_node, __meta_consul_service_port]
|
||||||
|
separator: ":"
|
||||||
|
target_label: __address__
|
||||||
|
replacement: "${1}:${2}"
|
||||||
|
action: replace
|
||||||
|
- source_labels: [__meta_consul_tag_metrics_scheme]
|
||||||
|
target_label: __scheme__
|
||||||
|
action: replace
|
||||||
|
- target_label: __metrics_path__
|
||||||
|
replacement: /metrics
|
||||||
|
- source_labels: [__meta_consul_tag_metrics_job]
|
||||||
|
target_label: job
|
||||||
|
action: replace
|
||||||
|
tls_config:
|
||||||
|
ca_file: /etc/vmagent-tls/ca.crt
|
||||||
@@ -0,0 +1,115 @@
|
|||||||
|
---
|
||||||
|
apiVersion: operator.victoriametrics.com/v1beta1
|
||||||
|
kind: VMCluster
|
||||||
|
metadata:
|
||||||
|
name: main
|
||||||
|
namespace: observability
|
||||||
|
spec:
|
||||||
|
retentionPeriod: "180d"
|
||||||
|
replicationFactor: 2
|
||||||
|
vmstorage:
|
||||||
|
replicaCount: 2
|
||||||
|
extraArgs:
|
||||||
|
dedup.minScrapeInterval: 15s
|
||||||
|
loggerFormat: json
|
||||||
|
storage:
|
||||||
|
volumeClaimTemplate:
|
||||||
|
spec:
|
||||||
|
storageClassName: cephrbd-fast-delete
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 200Gi
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "1"
|
||||||
|
memory: 2Gi
|
||||||
|
limits:
|
||||||
|
cpu: "2"
|
||||||
|
memory: 8Gi
|
||||||
|
vmselect:
|
||||||
|
replicaCount: 2
|
||||||
|
extraArgs:
|
||||||
|
dedup.minScrapeInterval: 15s
|
||||||
|
loggerFormat: json
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1024Mi
|
||||||
|
hpa:
|
||||||
|
minReplicas: 2
|
||||||
|
maxReplicas: 10
|
||||||
|
metrics:
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: cpu
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 60
|
||||||
|
behavior:
|
||||||
|
scaleUp:
|
||||||
|
stabilizationWindowSeconds: 0
|
||||||
|
selectPolicy: Max
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 100
|
||||||
|
periodSeconds: 30
|
||||||
|
- type: Pods
|
||||||
|
value: 4
|
||||||
|
periodSeconds: 30
|
||||||
|
scaleDown:
|
||||||
|
stabilizationWindowSeconds: 300
|
||||||
|
selectPolicy: Min
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 10
|
||||||
|
periodSeconds: 60
|
||||||
|
- type: Pods
|
||||||
|
value: 2
|
||||||
|
periodSeconds: 60
|
||||||
|
vminsert:
|
||||||
|
replicaCount: 2
|
||||||
|
extraArgs:
|
||||||
|
loggerFormat: json
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1024Mi
|
||||||
|
hpa:
|
||||||
|
minReplicas: 2
|
||||||
|
maxReplicas: 10
|
||||||
|
metrics:
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: cpu
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 60
|
||||||
|
behavior:
|
||||||
|
scaleUp:
|
||||||
|
stabilizationWindowSeconds: 0
|
||||||
|
selectPolicy: Max
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 100
|
||||||
|
periodSeconds: 30
|
||||||
|
- type: Pods
|
||||||
|
value: 4
|
||||||
|
periodSeconds: 30
|
||||||
|
scaleDown:
|
||||||
|
stabilizationWindowSeconds: 300
|
||||||
|
selectPolicy: Min
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 10
|
||||||
|
periodSeconds: 60
|
||||||
|
- type: Pods
|
||||||
|
value: 2
|
||||||
|
periodSeconds: 60
|
||||||
@@ -6,17 +6,3 @@ namespace: observability
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
- ../../../base/observability
|
- ../../../base/observability
|
||||||
|
|
||||||
helmCharts:
|
|
||||||
- name: victoria-metrics-cluster
|
|
||||||
repo: https://artifactapi.k8s.syd1.au.unkin.net/api/v1/virtual/helm
|
|
||||||
version: "0.33.0"
|
|
||||||
releaseName: victoria-metrics-cluster
|
|
||||||
namespace: observability
|
|
||||||
valuesFile: values-vmcluster.yaml
|
|
||||||
- name: victoria-metrics-agent
|
|
||||||
repo: https://artifactapi.k8s.syd1.au.unkin.net/api/v1/virtual/helm
|
|
||||||
version: "0.30.0"
|
|
||||||
releaseName: victoria-metrics-agent
|
|
||||||
namespace: observability
|
|
||||||
valuesFile: values-vmagent.yaml
|
|
||||||
|
|||||||
@@ -1,102 +0,0 @@
|
|||||||
image:
|
|
||||||
repository: victoriametrics/vmagent
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
global:
|
|
||||||
scrape_interval: 15s
|
|
||||||
podDisruptionBudget:
|
|
||||||
enabled: true
|
|
||||||
maxUnavailable: 1
|
|
||||||
podAnnotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "8481"
|
|
||||||
replicaCount: 3
|
|
||||||
extraArgs:
|
|
||||||
envflag.enable: true
|
|
||||||
envflag.prefix: VM_
|
|
||||||
loggerFormat: json
|
|
||||||
httpListenAddr: :8429
|
|
||||||
service:
|
|
||||||
enabled: true
|
|
||||||
ingress:
|
|
||||||
enabled: true
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: vault-issuer
|
|
||||||
cert-manager.io/common-name: vmagent.k8s.syd1.au.unkin.net
|
|
||||||
cert-manager.io/private-key-size: "4096"
|
|
||||||
external-dns.alpha.kubernetes.io/hostname: vmagent.k8s.syd1.au.unkin.net
|
|
||||||
external-dns.alpha.kubernetes.io/target: 198.18.200.0
|
|
||||||
hosts:
|
|
||||||
- name: vmagent.k8s.syd1.au.unkin.net
|
|
||||||
path:
|
|
||||||
- /
|
|
||||||
port: http
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- vmagent.k8s.syd1.au.unkin.net
|
|
||||||
secretName: vmagent-tls
|
|
||||||
ingressClassName: nginx
|
|
||||||
remoteWrite:
|
|
||||||
- url: http://victoria-metrics-cluster-vminsert.observability.svc.cluster.local:8480/insert/0/prometheus/
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: vmagent
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:8429"]
|
|
||||||
- job_name: "kubernetes-apiservers"
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels:
|
|
||||||
- __meta_kubernetes_namespace
|
|
||||||
- __meta_kubernetes_service_name
|
|
||||||
- __meta_kubernetes_endpoint_port_name
|
|
||||||
action: keep
|
|
||||||
regex: default;kubernetes;https
|
|
||||||
- job_name: "kubernetes-nodes"
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
- job_name: "kubernetes-nodes-cadvisor"
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
metrics_path: /metrics/cadvisor
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
- source_labels: [__metrics_path__]
|
|
||||||
target_label: metrics_path
|
|
||||||
metric_relabel_configs:
|
|
||||||
- action: replace
|
|
||||||
source_labels: [pod]
|
|
||||||
regex: '(.+)'
|
|
||||||
target_label: pod_name
|
|
||||||
replacement: '${1}'
|
|
||||||
- action: replace
|
|
||||||
source_labels: [container]
|
|
||||||
regex: '(.+)'
|
|
||||||
target_label: container_name
|
|
||||||
replacement: '${1}'
|
|
||||||
- action: replace
|
|
||||||
target_label: name
|
|
||||||
replacement: k8s_stub
|
|
||||||
- action: replace
|
|
||||||
source_labels: [id]
|
|
||||||
regex: '^/system\.slice/(.+)\.service$'
|
|
||||||
target_label: systemd_service_name
|
|
||||||
replacement: '${1}'
|
|
||||||
@@ -1,185 +0,0 @@
|
|||||||
vmselect:
|
|
||||||
enabled: true
|
|
||||||
image:
|
|
||||||
repository: victoriametrics/vmselect
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
variant: cluster
|
|
||||||
extraArgs:
|
|
||||||
envflag.enable: true
|
|
||||||
envflag.prefix: VM_
|
|
||||||
loggerFormat: json
|
|
||||||
httpListenAddr: :8481
|
|
||||||
dedup.minScrapeInterval: 15s
|
|
||||||
replicationFactor: 2
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 500m
|
|
||||||
memory: 1024Mi
|
|
||||||
requests:
|
|
||||||
cpu: 50m
|
|
||||||
memory: 128Mi
|
|
||||||
horizontalPodAutoscaler:
|
|
||||||
enabled: true
|
|
||||||
maxReplicas: 10
|
|
||||||
minReplicas: 2
|
|
||||||
metrics:
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: cpu
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: 60
|
|
||||||
behavior:
|
|
||||||
scaleUp:
|
|
||||||
stabilizationWindowSeconds: 0
|
|
||||||
selectPolicy: Max
|
|
||||||
policies:
|
|
||||||
- type: Percent
|
|
||||||
value: 100
|
|
||||||
periodSeconds: 30
|
|
||||||
- type: Pods
|
|
||||||
value: 4
|
|
||||||
periodSeconds: 30
|
|
||||||
scaleDown:
|
|
||||||
stabilizationWindowSeconds: 300
|
|
||||||
selectPolicy: Min
|
|
||||||
policies:
|
|
||||||
- type: Percent
|
|
||||||
value: 10
|
|
||||||
periodSeconds: 60
|
|
||||||
- type: Pods
|
|
||||||
value: 2
|
|
||||||
periodSeconds: 60
|
|
||||||
podAnnotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "8481"
|
|
||||||
podDisruptionBudget:
|
|
||||||
enabled: true
|
|
||||||
maxUnavailable: 1
|
|
||||||
replicaCount: 2
|
|
||||||
ingress:
|
|
||||||
enabled: true
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: vault-issuer
|
|
||||||
cert-manager.io/common-name: vmselect.k8s.syd1.au.unkin.net
|
|
||||||
cert-manager.io/private-key-size: "4096"
|
|
||||||
external-dns.alpha.kubernetes.io/hostname: vmselect.k8s.syd1.au.unkin.net
|
|
||||||
external-dns.alpha.kubernetes.io/target: 198.18.200.0
|
|
||||||
hosts:
|
|
||||||
- name: vmselect.k8s.syd1.au.unkin.net
|
|
||||||
path:
|
|
||||||
- /
|
|
||||||
port: http
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- vmselect.k8s.syd1.au.unkin.net
|
|
||||||
secretName: vmselect-tls
|
|
||||||
ingressClassName: nginx
|
|
||||||
|
|
||||||
vminsert:
|
|
||||||
enabled: true
|
|
||||||
image:
|
|
||||||
repository: victoriametrics/vminsert
|
|
||||||
variant: cluster
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
extraArgs:
|
|
||||||
envflag.enable: true
|
|
||||||
envflag.prefix: VM_
|
|
||||||
loggerFormat: json
|
|
||||||
httpListenAddr: :8480
|
|
||||||
replicationFactor: 2
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 500m
|
|
||||||
memory: 1024Mi
|
|
||||||
requests:
|
|
||||||
cpu: 50m
|
|
||||||
memory: 128Mi
|
|
||||||
horizontalPodAutoscaler:
|
|
||||||
enabled: true
|
|
||||||
maxReplicas: 10
|
|
||||||
minReplicas: 2
|
|
||||||
metrics:
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: cpu
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: 60
|
|
||||||
behavior:
|
|
||||||
scaleUp:
|
|
||||||
stabilizationWindowSeconds: 0
|
|
||||||
selectPolicy: Max
|
|
||||||
policies:
|
|
||||||
- type: Percent
|
|
||||||
value: 100
|
|
||||||
periodSeconds: 30
|
|
||||||
- type: Pods
|
|
||||||
value: 4
|
|
||||||
periodSeconds: 30
|
|
||||||
scaleDown:
|
|
||||||
stabilizationWindowSeconds: 300
|
|
||||||
selectPolicy: Min
|
|
||||||
policies:
|
|
||||||
- type: Percent
|
|
||||||
value: 10
|
|
||||||
periodSeconds: 60
|
|
||||||
- type: Pods
|
|
||||||
value: 2
|
|
||||||
periodSeconds: 60
|
|
||||||
podAnnotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "8480"
|
|
||||||
podDisruptionBudget:
|
|
||||||
enabled: true
|
|
||||||
maxUnavailable: 1
|
|
||||||
replicaCount: 2
|
|
||||||
ingress:
|
|
||||||
enabled: true
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: vault-issuer
|
|
||||||
cert-manager.io/common-name: vminsert.k8s.syd1.au.unkin.net
|
|
||||||
cert-manager.io/private-key-size: "4096"
|
|
||||||
external-dns.alpha.kubernetes.io/hostname: vminsert.k8s.syd1.au.unkin.net
|
|
||||||
external-dns.alpha.kubernetes.io/target: 198.18.200.0
|
|
||||||
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
|
|
||||||
hosts:
|
|
||||||
- name: vminsert.k8s.syd1.au.unkin.net
|
|
||||||
path:
|
|
||||||
- /
|
|
||||||
port: http
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- vminsert.k8s.syd1.au.unkin.net
|
|
||||||
secretName: vminsert-tls
|
|
||||||
ingressClassName: nginx
|
|
||||||
|
|
||||||
vmstorage:
|
|
||||||
enabled: true
|
|
||||||
image:
|
|
||||||
repository: victoriametrics/vmstorage
|
|
||||||
variant: cluster
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
retentionPeriod: 180d
|
|
||||||
extraArgs:
|
|
||||||
envflag.enable: true
|
|
||||||
envflag.prefix: VM_
|
|
||||||
loggerFormat: json
|
|
||||||
httpListenAddr: :8482
|
|
||||||
dedup.minScrapeInterval: 15s
|
|
||||||
podAnnotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "8482"
|
|
||||||
podDisruptionBudget:
|
|
||||||
enabled: true
|
|
||||||
maxUnavailable: 1
|
|
||||||
persistentVolume:
|
|
||||||
enabled: true
|
|
||||||
name: vmstorage-volume
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
storageClassName: cephrbd-fast-delete
|
|
||||||
mountPath: /storage
|
|
||||||
size: 200Gi
|
|
||||||
replicaCount: 3
|
|
||||||
podManagementPolicy: OrderedReady
|
|
||||||
Reference in New Issue
Block a user