diff --git a/apps/base/observability/kustomization.yaml b/apps/base/observability/kustomization.yaml new file mode 100644 index 0000000..5122f9e --- /dev/null +++ b/apps/base/observability/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - namespace.yaml diff --git a/apps/base/observability/namespace.yaml b/apps/base/observability/namespace.yaml new file mode 100644 index 0000000..f6e60db --- /dev/null +++ b/apps/base/observability/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/name: observability + name: observability diff --git a/apps/overlays/au-syd1/observability/kustomization.yaml b/apps/overlays/au-syd1/observability/kustomization.yaml new file mode 100644 index 0000000..0fa7f42 --- /dev/null +++ b/apps/overlays/au-syd1/observability/kustomization.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: observability + +resources: + - ../../../base/observability + +helmCharts: + - name: victoria-metrics-cluster + repo: https://victoriametrics.github.io/helm-charts/ + version: "0.33.0" + releaseName: victoria-metrics-cluster + namespace: observability + valuesFile: values-vmcluster.yaml + - name: victoria-metrics-agent + repo: https://victoriametrics.github.io/helm-charts/ + version: "0.30.0" + releaseName: victoria-metrics-agent + namespace: observability + valuesFile: values-vmagent.yaml diff --git a/apps/overlays/au-syd1/observability/values-vmagent.yaml b/apps/overlays/au-syd1/observability/values-vmagent.yaml new file mode 100644 index 0000000..50aa31a --- /dev/null +++ b/apps/overlays/au-syd1/observability/values-vmagent.yaml @@ -0,0 +1,102 @@ +image: + repository: victoriametrics/vmagent + pullPolicy: IfNotPresent +global: + scrape_interval: 15s +podDisruptionBudget: + enabled: true + maxUnavailable: 1 +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8481" +replicaCount: 3 +extraArgs: + envflag.enable: true + envflag.prefix: VM_ + loggerFormat: json + httpListenAddr: :8429 +service: + enabled: true +ingress: + enabled: true + annotations: + cert-manager.io/cluster-issuer: vault-issuer + cert-manager.io/common-name: vmagent.k8s.syd1.au.unkin.net + cert-manager.io/private-key-size: "4096" + external-dns.alpha.kubernetes.io/hostname: vmagent.k8s.syd1.au.unkin.net + external-dns.alpha.kubernetes.io/target: 198.18.200.0 + hosts: + - name: vmagent.k8s.syd1.au.unkin.net + path: + - / + port: http + tls: + - hosts: + - vmagent.k8s.syd1.au.unkin.net + secretName: vmagent-tls + ingressClassName: nginx +remoteWrite: + - url: http://victoria-metrics-cluster-vminsert.observability.svc.cluster.local:8480/insert/0/prometheus/ +scrape_configs: + - job_name: vmagent + static_configs: + - targets: ["localhost:8429"] + - job_name: "kubernetes-apiservers" + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + action: keep + regex: default;kubernetes;https + - job_name: "kubernetes-nodes" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - job_name: "kubernetes-nodes-cadvisor" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__metrics_path__] + target_label: metrics_path + metric_relabel_configs: + - action: replace + source_labels: [pod] + regex: '(.+)' + target_label: pod_name + replacement: '${1}' + - action: replace + source_labels: [container] + regex: '(.+)' + target_label: container_name + replacement: '${1}' + - action: replace + target_label: name + replacement: k8s_stub + - action: replace + source_labels: [id] + regex: '^/system\.slice/(.+)\.service$' + target_label: systemd_service_name + replacement: '${1}' diff --git a/apps/overlays/au-syd1/observability/values-vmcluster.yaml b/apps/overlays/au-syd1/observability/values-vmcluster.yaml new file mode 100644 index 0000000..32a7e36 --- /dev/null +++ b/apps/overlays/au-syd1/observability/values-vmcluster.yaml @@ -0,0 +1,185 @@ +vmselect: + enabled: true + image: + repository: victoriametrics/vmselect + pullPolicy: IfNotPresent + variant: cluster + extraArgs: + envflag.enable: true + envflag.prefix: VM_ + loggerFormat: json + httpListenAddr: :8481 + dedup.minScrapeInterval: 15s + replicationFactor: 2 + resources: + limits: + cpu: 500m + memory: 1024Mi + requests: + cpu: 50m + memory: 128Mi + horizontalPodAutoscaler: + enabled: true + maxReplicas: 10 + minReplicas: 2 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + behavior: + scaleUp: + stabilizationWindowSeconds: 0 + selectPolicy: Max + policies: + - type: Percent + value: 100 + periodSeconds: 30 + - type: Pods + value: 4 + periodSeconds: 30 + scaleDown: + stabilizationWindowSeconds: 300 + selectPolicy: Min + policies: + - type: Percent + value: 10 + periodSeconds: 60 + - type: Pods + value: 2 + periodSeconds: 60 + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8481" + podDisruptionBudget: + enabled: true + maxUnavailable: 1 + replicaCount: 2 + ingress: + enabled: true + annotations: + cert-manager.io/cluster-issuer: vault-issuer + cert-manager.io/common-name: vmselect.k8s.syd1.au.unkin.net + cert-manager.io/private-key-size: "4096" + external-dns.alpha.kubernetes.io/hostname: vmselect.k8s.syd1.au.unkin.net + external-dns.alpha.kubernetes.io/target: 198.18.200.0 + hosts: + - name: vmselect.k8s.syd1.au.unkin.net + path: + - / + port: http + tls: + - hosts: + - vmselect.k8s.syd1.au.unkin.net + secretName: vmselect-tls + ingressClassName: nginx + +vminsert: + enabled: true + image: + repository: victoriametrics/vminsert + variant: cluster + pullPolicy: IfNotPresent + extraArgs: + envflag.enable: true + envflag.prefix: VM_ + loggerFormat: json + httpListenAddr: :8480 + replicationFactor: 2 + resources: + limits: + cpu: 500m + memory: 1024Mi + requests: + cpu: 50m + memory: 128Mi + horizontalPodAutoscaler: + enabled: true + maxReplicas: 10 + minReplicas: 2 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + behavior: + scaleUp: + stabilizationWindowSeconds: 0 + selectPolicy: Max + policies: + - type: Percent + value: 100 + periodSeconds: 30 + - type: Pods + value: 4 + periodSeconds: 30 + scaleDown: + stabilizationWindowSeconds: 300 + selectPolicy: Min + policies: + - type: Percent + value: 10 + periodSeconds: 60 + - type: Pods + value: 2 + periodSeconds: 60 + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8480" + podDisruptionBudget: + enabled: true + maxUnavailable: 1 + replicaCount: 2 + ingress: + enabled: true + annotations: + cert-manager.io/cluster-issuer: vault-issuer + cert-manager.io/common-name: vminsert.k8s.syd1.au.unkin.net + cert-manager.io/private-key-size: "4096" + external-dns.alpha.kubernetes.io/hostname: vminsert.k8s.syd1.au.unkin.net + external-dns.alpha.kubernetes.io/target: 198.18.200.0 + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + hosts: + - name: vminsert.k8s.syd1.au.unkin.net + path: + - / + port: http + tls: + - hosts: + - vminsert.k8s.syd1.au.unkin.net + secretName: vminsert-tls + ingressClassName: nginx + +vmstorage: + enabled: true + image: + repository: victoriametrics/vmstorage + variant: cluster + pullPolicy: IfNotPresent + retentionPeriod: 180d + extraArgs: + envflag.enable: true + envflag.prefix: VM_ + loggerFormat: json + httpListenAddr: :8482 + dedup.minScrapeInterval: 15s + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8482" + podDisruptionBudget: + enabled: true + maxUnavailable: 1 + persistentVolume: + enabled: true + name: vmstorage-volume + accessModes: + - ReadWriteOnce + storageClassName: cephrbd-fast-delete + mountPath: /storage + size: 200Gi + replicaCount: 3 + podManagementPolicy: OrderedReady diff --git a/argocd/applicationsets/kustomization.yaml b/argocd/applicationsets/kustomization.yaml index 887442d..b8329a8 100644 --- a/argocd/applicationsets/kustomization.yaml +++ b/argocd/applicationsets/kustomization.yaml @@ -3,5 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - observability.yaml - platform.yaml - storage.yaml diff --git a/argocd/applicationsets/observability.yaml b/argocd/applicationsets/observability.yaml new file mode 100644 index 0000000..0b516bd --- /dev/null +++ b/argocd/applicationsets/observability.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: observability-apps + namespace: argocd +spec: + generators: + - git: + repoURL: https://git.unkin.net/unkin/argocd-apps + revision: HEAD + directories: + - path: apps/overlays/*/observability + template: + metadata: + name: 'observability-{{path[3]}}' + spec: + project: observability + source: + repoURL: https://git.unkin.net/unkin/argocd-apps + targetRevision: HEAD + path: '{{path}}' + destination: + server: https://kubernetes.default.svc + namespace: '{{path[3]}}' + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - ServerSideApply=true diff --git a/argocd/projects/kustomization.yaml b/argocd/projects/kustomization.yaml index 887442d..b8329a8 100644 --- a/argocd/projects/kustomization.yaml +++ b/argocd/projects/kustomization.yaml @@ -3,5 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - observability.yaml - platform.yaml - storage.yaml diff --git a/argocd/projects/observability.yaml b/argocd/projects/observability.yaml new file mode 100644 index 0000000..5f31e53 --- /dev/null +++ b/argocd/projects/observability.yaml @@ -0,0 +1,24 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: AppProject +metadata: + name: observability + namespace: argocd +spec: + description: Observability stack (metrics, monitoring) + sourceRepos: + - https://git.unkin.net/unkin/argocd-apps + - https://victoriametrics.github.io/helm-charts/ + destinations: + - namespace: 'observability' + server: https://kubernetes.default.svc + clusterResourceWhitelist: + - group: '' + kind: Namespace + - group: 'rbac.authorization.k8s.io' + kind: ClusterRole + - group: 'rbac.authorization.k8s.io' + kind: ClusterRoleBinding + namespaceResourceWhitelist: + - group: '*' + kind: '*'