Try the monitoring stack from flux

This commit is contained in:
2023-10-28 02:28:33 +01:00
parent e5d10dc333
commit 0d61ac43a7
15 changed files with 3758 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: monitoring-controllers
namespace: flux-system
spec:
interval: 1h
retryInterval: 2m
timeout: 10m
prune: true
wait: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./monitoring/controllers
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: monitoring-configs
namespace: flux-system
spec:
dependsOn:
- name: monitoring-controllers
interval: 1h
retryInterval: 2m
timeout: 5m
prune: true
wait: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./monitoring/configs

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,332 @@
{
"__inputs": [
{
"name": "DS_LOKI",
"label": "Loki",
"description": "",
"type": "datasource",
"pluginId": "loki",
"pluginName": "Loki"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
},
{
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"iconColor": "red",
"name": "flux events",
"target": {
"limit": 100,
"matchAny": false,
"tags": [
"flux"
],
"type": "tags"
}
}
]
},
"description": "Flux logs collected from Kubernetes, stored in Loki",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 29,
"iteration": 1653748775696,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": "${DS_LOKI}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": "${DS_LOKI}",
"expr": "sum(count_over_time({namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\" [$__interval]))",
"instant": false,
"legendFormat": "Log count",
"range": true,
"refId": "A"
}
],
"type": "timeseries"
},
{
"datasource": "${DS_LOKI}",
"description": "Logs from services running in Kubernetes",
"gridPos": {
"h": 25,
"w": 24,
"x": 0,
"y": 4
},
"id": 2,
"options": {
"dedupStrategy": "numbers",
"enableLogDetails": false,
"prettifyLogMessage": true,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": "${DS_LOKI}",
"expr": "{namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\"",
"refId": "A"
}
],
"type": "logs"
}
],
"refresh": "10s",
"schemaVersion": 36,
"style": "light",
"tags": [
"flux"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"description": "String to search for",
"hide": 0,
"label": "Search Query",
"name": "query",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"skipUrlSync": false,
"type": "textbox"
},
{
"allValue": "info|error",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"hide": 0,
"includeAll": true,
"multi": false,
"name": "level",
"options": [
{
"selected": true,
"text": "All",
"value": "$__all"
},
{
"selected": false,
"text": "info",
"value": "info"
},
{
"selected": false,
"text": "error",
"value": "error"
}
],
"query": "info,error",
"queryValue": "",
"skipUrlSync": false,
"type": "custom"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": "${DS_LOKI}",
"definition": "label_values(app)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "controller",
"options": [],
"query": "label_values(app)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"flux-system"
],
"value": [
"flux-system"
]
},
"datasource": "${DS_LOKI}",
"definition": "label_values(namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [],
"query": "label_values(namespace)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "${DS_LOKI}",
"definition": "label_values(stream)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "stream",
"options": [],
"query": "label_values(stream)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": "Loki",
"value": "Loki"
},
"hide": 0,
"includeAll": false,
"label": "Datasource",
"multi": false,
"name": "DS_LOKI",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Flux Logs",
"uid": "flux-logs",
"version": 2
}

View File

@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- podmonitor.yaml
configMapGenerator:
- name: flux-grafana-dashboards
files:
- dashboards/control-plane.json
- dashboards/cluster.json
- dashboards/logs.json
options:
labels:
grafana_dashboard: "1"
app.kubernetes.io/part-of: flux
app.kubernetes.io/component: monitoring

View File

@@ -0,0 +1,29 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: flux-system
labels:
app.kubernetes.io/part-of: flux
app.kubernetes.io/component: monitoring
spec:
namespaceSelector:
matchNames:
- flux-system
selector:
matchExpressions:
- key: app
operator: In
values:
- helm-controller
- source-controller
- kustomize-controller
- notification-controller
- image-automation-controller
- image-reflector-controller
podMetricsEndpoints:
- port: http-prom
relabelings:
# https://github.com/prometheus-operator/prometheus-operator/issues/4816
- sourceLabels: [__meta_kubernetes_pod_phase]
action: keep
regex: Running

View File

@@ -0,0 +1,274 @@
kube-state-metrics:
collectors: [ ]
extraArgs:
- --custom-resource-state-only=true
rbac:
extraRules:
- apiGroups:
- source.toolkit.fluxcd.io
- kustomize.toolkit.fluxcd.io
- helm.toolkit.fluxcd.io
- notification.toolkit.fluxcd.io
- image.toolkit.fluxcd.io
resources:
- gitrepositories
- buckets
- helmrepositories
- helmcharts
- ocirepositories
- kustomizations
- helmreleases
- alerts
- providers
- receivers
- imagerepositories
- imagepolicies
- imageupdateautomations
verbs: [ "list", "watch" ]
customResourceState:
enabled: true
config:
spec:
resources:
- groupVersionKind:
group: kustomize.toolkit.fluxcd.io
version: v1
kind: Kustomization
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, lastAppliedRevision ]
source_name: [ spec, sourceRef, name ]
- groupVersionKind:
group: helm.toolkit.fluxcd.io
version: v2beta1
kind: HelmRelease
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, lastAppliedRevision ]
chart_name: [ spec, chart, spec, chart ]
chart_source_name: [ spec, chart, spec, sourceRef, name ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1
kind: GitRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: Bucket
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
endpoint: [ spec, endpoint ]
bucket_name: [ spec, bucketName ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: HelmRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: HelmChart
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
chart_name: [ spec, chart ]
chart_version: [ spec, version ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: OCIRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1beta2
kind: Alert
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1beta2
kind: Provider
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1
kind: Receiver
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
webhook_path: [ status, webhookPath ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta2
kind: ImageRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
image: [ spec, image ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta2
kind: ImagePolicy
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
source_name: [ spec, imageRepositoryRef, name ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta1
kind: ImageUpdateAutomation
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
source_name: [ spec, sourceRef, name ]

View File

@@ -0,0 +1,17 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- namespace.yaml
- repository.yaml
- release.yaml
configMapGenerator:
- name: flux-kube-state-metrics-config
files:
- kube-state-metrics-config.yaml
options:
labels:
app.kubernetes.io/part-of: flux
app.kubernetes.io/component: monitoring
configurations:
- kustomizeconfig.yaml

View File

@@ -0,0 +1,6 @@
nameReference:
- kind: ConfigMap
version: v1
fieldSpecs:
- path: spec/valuesFrom/name
kind: HelmRelease

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
app.kubernetes.io/component: monitoring

View File

@@ -0,0 +1,58 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 1h
chart:
spec:
version: "48.x"
chart: kube-prometheus-stack
sourceRef:
kind: HelmRepository
name: prometheus-community
interval: 1h
install:
crds: Create
upgrade:
crds: CreateReplace
valuesFrom:
- kind: ConfigMap
name: flux-kube-state-metrics-config
valuesKey: kube-state-metrics-config.yaml
# https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
values:
alertmanager:
enabled: false
prometheus:
prometheusSpec:
retention: 24h
resources:
requests:
cpu: 200m
memory: 200Mi
podMonitorNamespaceSelector: { }
podMonitorSelector:
matchLabels:
app.kubernetes.io/component: monitoring
grafana:
defaultDashboardsEnabled: false
adminPassword: flux
postRenderers:
- kustomize:
patches:
- target:
# Ignore these objects from Flux diff as they are mutated from chart hooks
kind: (ValidatingWebhookConfiguration|MutatingWebhookConfiguration)
name: kube-prometheus-stack-admission
patch: |
- op: add
path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection
value: disabled
- target:
# Ignore these objects from Flux diff as they are mutated at apply time but not at dry-run time
kind: PrometheusRule
patch: |
- op: add
path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection
value: disabled

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: prometheus-community
spec:
interval: 12h
type: oci
url: oci://ghcr.io/prometheus-community/charts

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- repository.yaml
- release.yaml

View File

@@ -0,0 +1,34 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: loki-stack
spec:
interval: 5m
dependsOn:
- name: kube-prometheus-stack
chart:
spec:
version: "2.x"
chart: loki-stack
sourceRef:
kind: HelmRepository
name: grafana-charts
interval: 60m
# https://github.com/grafana/helm-charts/blob/main/charts/loki-stack/values.yaml
# https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml
values:
promtail:
enabled: true
loki:
enabled: true
isDefault: false
serviceMonitor:
enabled: true
additionalLabels:
app.kubernetes.io/part-of: kube-prometheus-stack
config:
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: true
retention_period: 12h

View File

@@ -0,0 +1,7 @@
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: grafana-charts
spec:
interval: 120m0s
url: https://grafana.github.io/helm-charts