From ffd79a5eeaa4840112e35e31528f1c4b11607de1 Mon Sep 17 00:00:00 2001 From: Guillermo Facundo Colunga <guillermo.facundo.colunga@cern.ch> Date: Thu, 11 Jul 2024 14:05:12 +0200 Subject: [PATCH] [MONIT-3967] Make sure helmchart is fully configurable As part of this commit the following additions have been done: * removed templates/namespace in favor of `.Release.namespace` * use `enabled` flags per component * add metrics server dependency (it was missing) * add to the readme file all default values with its description --- Chart.yaml | 10 +- README.md | 116 ++++++---- templates/api_server/servicemonitor.yaml | 6 +- templates/coredns/servicemonitor.yaml | 8 +- templates/ectd/servicemonitor.yaml | 6 +- templates/fluentbit-logs/clusterrole.yaml | 6 +- .../fluentbit-logs/clusterrolebinding.yaml | 8 +- templates/fluentbit-logs/configmap.yaml | 6 +- templates/fluentbit-logs/daemonset.yaml | 9 +- templates/fluentbit-logs/serviceaccount.yaml | 6 +- templates/fluentbit-metrics/configmap.yaml | 6 +- templates/fluentbit-metrics/statefulset.yaml | 9 +- templates/ingress_nginx/servicemonitor.yaml | 8 +- templates/kube_state/clusterrole.yaml | 7 +- templates/kube_state/clusterrolebinding.yaml | 7 +- templates/kube_state/deployment.yaml | 7 +- templates/kube_state/service.yaml | 5 +- templates/kube_state/serviceaccount.yaml | 5 +- templates/kube_state/servicemonitor.yaml | 8 +- templates/kubecontroller/servicemonitor.yaml | 8 +- templates/kubelet/servicemonitor.yaml | 8 +- templates/kubeproxy/servicemonitor.yaml | 8 +- templates/namespace.yaml | 6 - templates/node_exporter/daemonset.yaml | 6 +- templates/node_exporter/podmonitor.yaml | 10 +- templates/prometheus/clusterrole.yaml | 8 +- templates/prometheus/clusterrolebinding.yaml | 8 +- templates/prometheus/prometheus.yaml | 28 +-- templates/prometheus/remotewritesecret.yaml | 16 +- templates/prometheus/serviceaccount.yaml | 6 +- .../prometheus_operator/clusterrole.yaml | 8 +- .../clusterrolebinding.yaml | 8 +- templates/prometheus_operator/deployment.yaml | 16 +- templates/prometheus_operator/service.yaml | 6 +- .../prometheus_operator/serviceaccount.yaml | 6 +- templates/scheduler/servicemonitor.yaml | 6 +- values.yaml | 204 +++++++++++------- 37 files changed, 345 insertions(+), 264 deletions(-) delete mode 100644 templates/namespace.yaml diff --git a/Chart.yaml b/Chart.yaml index 8bd97d6..82470d5 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -3,10 +3,16 @@ name: cern-it-monitoring-kubernetes type: application appVersion: v0.1.0 version: 0.1.0 -kubeVersion: ">=1.21.0-0" +kubeVersion: ">=1.28.0-0" description: Helm Chart provided by IT Monitoring Service to install and configure required components to gather and send monitoring data from kubernetes clusters to central service. home: https://cern.ch/monitoring dependencies: - name: prometheus-operator-crds repository: https://prometheus-community.github.io/helm-charts - version: 11.0.0 \ No newline at end of file + version: 11.0.0 + condition: metrics.prometheus.enabled + - name: metrics-server + repository: https://kubernetes-sigs.github.io/metrics-server/ + version: 3.12.1 + alias: metricsserver + condition: metrics.metricsserver.enabled \ No newline at end of file diff --git a/README.md b/README.md index 7856140..0d3acb6 100644 --- a/README.md +++ b/README.md @@ -4,54 +4,90 @@ This Helm chart facilitates the deployment of a comprehensive monitoring solution for Kubernetes clusters at CERN. It enables the collection of metrics, logs, and, in the future, traces. The chart deploys and configures necessary components to gather and forward metrics and logs to the central monitoring system. -## Prerequisites -- Kubernetes cluster -- Helm 3+ -- Tenant credentials for the central monitoring service at CERN +## Requirements + +Kubernetes: `>=1.28.0-0` + +| Repository | Name | Version | +|------------|------|---------| +| https://kubernetes-sigs.github.io/metrics-server/ | metricsserver(metrics-server) | 3.12.1 | +| https://prometheus-community.github.io/helm-charts | prometheus-operator-crds | 11.0.0 | ## Installation -To install the Helm chart, use the following command: +To install the Helm chart in the monitoring namespace (creating it if it does not exist), use the following command: ```sh -helm install cern-it-monitoring-kubernetes ./path-to-your-chart --set k8sClusterName=<your-cluster-name> --set tenantName=<your-tenant-name> --set tenantPassword=<your-tenant-password> +helm install cern-it-monitoring-kubernetes ./path-to-your-chart --set kubernetes.clusterName=<your-cluster-name> --set tenant.name=<your-tenant-name> --set tenant.password=<your-tenant-password> -n monitoring --create-namespace ``` Replace `<your-cluster-name>`, `<your-tenant-name>`, and `<your-tenant-password>` with your desired values. -## Configuration - -The chart can be customized using the following parameters in the values.yaml file: - -- monitMetricsCollectionEndpoint: The endpoint for metrics collection. -- monitLogsCollectionEndpoint: The endpoint for logs collection. -- monitTracesCollectionEndpoint: The endpoint for traces collection. -- k8sClusterName: A label added to all metrics and logs to track their origin. -- tenantName: The tenant name for sending metrics and logs to central monitoring. -- tenantPassword: The tenant password for sending metrics and logs to central monitoring. -- commonLabels: Common labels applied to every resource created by the chart. -- namespace: The namespace for deploying all components. -- metrics: Configuration for metrics collection components. - - nodeExporter, metricsServer, kubeState, prometheusOperator, prometheusServer, fluentbit: Specific configurations for each component. -- logs: Configuration for logs collection. - - fluentbit: Specific configuration for Fluent Bit. - -Further customization can be achieved by exploring the `values.yaml` file from this repository and overriding any given key. - -### Metrics - -Metrics collection is enabled by default. You can customize the settings for different components like `nodeExporter`, `metricsServer`, `kubeState`, `prometheusOperator`, and `prometheusServer`. - -#### Fluentbit - -You can enable fluentbit forwarder to otlp in order to forward your metrics to MONIT (.Values.metrics.fluentbit.enable). -By default it will scrape your MONIT managed Prometheus every 60 seconds and send all the metrics available, if you just want to send a subset of them please us the .Values.metrics.fluentbit.matchQuery parameter. -Have into consideration that the current limits are set for a small cluster, if you have more metrics to send you might need to customise the buffer for the input and the memory and cpu limits for the forwarder. - -Please make sure you don't send your metrics twice (i.e if you have configured the remote write in Prometheus already). - -### Logs - -Logs collection is enabled by default using Fluent Bit. Customize Fluent Bit settings for `service`, `inputs`, `filters`, and `outputs`. +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| kubernetes.clusterName | string | `"nil"` | name of the kubernetes cluster to monitor. This value will be appended tovery metric and log via k8s_cluster_name label | +| logs.enabled | bool | `false` | indicates if logs components should be enabled or not. If set to false no logs component will be installed nor configured | +| logs.fluentbit.customParsers | string | `""` | | +| logs.fluentbit.enabled | bool | `false` | indicates if fluentbit logs component should be installed or not | +| logs.fluentbit.extraVolumeMounts | list | `[]` | | +| logs.fluentbit.extraVolumes | list | `[]` | | +| logs.fluentbit.filters | string | Kubernetes filter. See `values.yaml` file. | fluentbit filters as a yaml list in a multiline string | +| logs.fluentbit.inputs | string | Tail plugin over `/var/log/containers/*.log` files. See `values.yaml` file. | +| logs.fluentbit.outputs | string | OpenTelemetry plugin using `otlp.endpoint`, `otlp.port`, `tenant.username` and `tenant.password`. See `values.yaml`. | fluentbit outputs as a yaml list in a multiline string | +| logs.fluentbit.resources.limits.cpu | string | `"20m"` | | +| logs.fluentbit.resources.limits.memory | string | `"25Mi"` | | +| logs.fluentbit.resources.requests.cpu | string | `"5m"` | | +| logs.fluentbit.resources.requests.memory | string | `"15Mi"` | | +| logs.fluentbit.scrapeInterval | string | `"15s"` | interval used by the local prometheus (if installed) to scrape metrics from logs fluentbits | +| logs.fluentbit.service | string | Daemon mode off listening on port 2020. See `values.yaml`. | fluentbit service configuration options in a multiline string | +| metrics.enabled | bool | `true` | indicates if all metrics components should be enabled or not. If set to false no metrics component will be installed nor configured | +| metrics.fluentbit.enable | bool | `true` | if true fluentbit daemon set will be installed | +| metrics.fluentbit.filters | string | `"nil"` | fluentbit filters as a yaml list in a multiline string | +| metrics.fluentbit.inputs | string | Configuration to scrape local prometheus. See `values.yaml`. | fluentbit inputs as a yaml list in a multiline string | +| metrics.fluentbit.matchQuery | string | `"match[]={job!=\"\"}"` | Query parameter to apply to the federate Prometheus URL, use this to filter and send only specific metrics | +| metrics.fluentbit.prometheusScrapeBufferMaxSize | string | `"100M"` | fluentbit buffer size. The more metrics to send the bigger needs to be | +| metrics.fluentbit.prometheusScrapeInterval | string | `"60s"` | interval used by fluentbit to scrape metrics from prometheus | +| metrics.fluentbit.resources.limits.cpu | string | `"1"` | | +| metrics.fluentbit.resources.limits.memory | string | `"500Mi"` | | +| metrics.fluentbit.resources.requests.cpu | string | `"1"` | | +| metrics.fluentbit.resources.requests.memory | string | `"150Mi"` | | +| metrics.fluentbit.service | string | Daemon mode off listening on port 2020. See `values.yaml`. | fluentbit service configuration options in a multiline string | +| metrics.kubeState.enabled | bool | `true` | if true kube state will be installed together with a service monitor | +| metrics.kubeState.resources.limits.cpu | string | `"20m"` | | +| metrics.kubeState.resources.limits.memory | string | `"25Mi"` | | +| metrics.kubeState.resources.requests.cpu | string | `"5m"` | | +| metrics.kubeState.resources.requests.memory | string | `"15Mi"` | | +| metrics.kubeState.scrapeInterval | string | `"15s"` | indicates how often kube state will be scraped by the local prometheus | +| metrics.metricsserver.enabled | bool | `true` | if true metrics server will be installed | +| metrics.metricsserver.resources.limits.cpu | string | `"100m"` | | +| metrics.metricsserver.resources.limits.memory | string | `"200Mi"` | | +| metrics.metricsserver.resources.requests.cpu | string | `"100m"` | | +| metrics.metricsserver.resources.requests.memory | string | `"200Mi"` | | +| metrics.nodeExporter.enabled | bool | `true` | if true node exporter will be installed as a daemon set together with a pod monitor | +| metrics.nodeExporter.resources.limits.cpu | string | `"20m"` | | +| metrics.nodeExporter.resources.limits.memory | string | `"25Mi"` | | +| metrics.nodeExporter.resources.requests.cpu | string | `"5m"` | | +| metrics.nodeExporter.resources.requests.memory | string | `"15Mi"` | | +| metrics.nodeExporter.scrapeInterval | string | `"15s"` | indicates how often node exporter will be scraped by the local prometheus | +| metrics.prometheus.enabled | bool | `true` | if true prometheus operator and a prometheus server will be installed | +| metrics.prometheus.operator | object | Resources configuration. See `values.yaml`. | specific configuration for the prometheus operator | +| metrics.prometheus.server.extraLabelsForMetrics | list | `[]` | set of static labels and values to add to all the metrics gathered by the in-cluster prometheus when exported to central monitoring | +| metrics.prometheus.server.remoteWrite | object | `{}` | remote write prometheus configuration | +| metrics.prometheus.server.resources.limits.cpu | string | `"500m"` | | +| metrics.prometheus.server.resources.limits.memory | string | `"5Gi"` | | +| metrics.prometheus.server.resources.requests.cpu | string | `"100m"` | | +| metrics.prometheus.server.resources.requests.memory | string | `"2Gi"` | | +| metrics.prometheus.server.retention | string | `"24h"` | interval during which local cluster prometheus will store metrics | +| metrics.prometheus.server.scrapeInterval | string | `"10s"` | interval used to self scrape metrics | +| metrics.prometheus.server.scrapeTimeout | string | `"5s"` | timeout for self scraped metrics | +| metrics.prometheus.server.version | string | `"v2.50.0"` | prometheus version to use by the local cluster prometheus | +| otlp.endpoint | string | `"monit-otlp.cern.ch"` | otlp endpoint where the otlp receivers are listening | +| otlp.port | int | `4319` | otlp port where the otlp receivers are listening | +| tenant.name | string | `"nil"` | username used for authenitcating in the MONIT infrastructure | +| tenant.password | string | `"nil"` | password (plain) used for authenitcating in the MONIT infrastructure | + +---------------------------------------------- ## Support diff --git a/templates/api_server/servicemonitor.yaml b/templates/api_server/servicemonitor.yaml index 3460f24..f94768e 100644 --- a/templates/api_server/servicemonitor.yaml +++ b/templates/api_server/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-apiserver - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -35,3 +34,4 @@ spec: matchLabels: component: apiserver provider: kubernetes +{{- end -}} \ No newline at end of file diff --git a/templates/coredns/servicemonitor.yaml b/templates/coredns/servicemonitor.yaml index 38687df..2c671e6 100644 --- a/templates/coredns/servicemonitor.yaml +++ b/templates/coredns/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-coredns - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -21,4 +20,5 @@ spec: selector: matchLabels: app.kubernetes.io/name: coredns - app.kubernetes.io/component: metrics \ No newline at end of file + app.kubernetes.io/component: metrics +{{- end -}} \ No newline at end of file diff --git a/templates/ectd/servicemonitor.yaml b/templates/ectd/servicemonitor.yaml index fe43a8f..adb38d6 100644 --- a/templates/ectd/servicemonitor.yaml +++ b/templates/ectd/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-etcd - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -22,3 +21,4 @@ spec: matchLabels: app: kube-prometheus-stack-kube-etcd release: cern-magnum +{{- end -}} \ No newline at end of file diff --git a/templates/fluentbit-logs/clusterrole.yaml b/templates/fluentbit-logs/clusterrole.yaml index 806d90a..054acdf 100644 --- a/templates/fluentbit-logs/clusterrole.yaml +++ b/templates/fluentbit-logs/clusterrole.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.logs.enabled .Values.logs.fluentbit.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} rules: - apiGroups: - "" @@ -18,3 +17,4 @@ rules: - get - list - watch +{{- end -}} diff --git a/templates/fluentbit-logs/clusterrolebinding.yaml b/templates/fluentbit-logs/clusterrolebinding.yaml index 4eda1bb..477a200 100644 --- a/templates/fluentbit-logs/clusterrolebinding.yaml +++ b/templates/fluentbit-logs/clusterrolebinding.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.logs.enabled .Values.logs.fluentbit.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -12,4 +11,5 @@ roleRef: subjects: - kind: ServiceAccount name: it-monit-logs-collector-fluentbit - namespace: monitoring \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/fluentbit-logs/configmap.yaml b/templates/fluentbit-logs/configmap.yaml index 533e220..be00836 100644 --- a/templates/fluentbit-logs/configmap.yaml +++ b/templates/fluentbit-logs/configmap.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.logs.enabled .Values.logs.fluentbit.enabled -}} apiVersion: v1 kind: ConfigMap metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} data: custom_parsers.conf: | {{- (tpl .Values.logs.fluentbit.customParsers $) | nindent 4 }} @@ -13,3 +12,4 @@ data: {{- (tpl .Values.logs.fluentbit.inputs $) | nindent 4 }} {{- (tpl .Values.logs.fluentbit.filters $) | nindent 4 }} {{- (tpl .Values.logs.fluentbit.outputs $) | nindent 4 }} +{{- end -}} diff --git a/templates/fluentbit-logs/daemonset.yaml b/templates/fluentbit-logs/daemonset.yaml index 9f18bd9..5369b22 100644 --- a/templates/fluentbit-logs/daemonset.yaml +++ b/templates/fluentbit-logs/daemonset.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.logs.enabled .Values.logs.fluentbit.enabled -}} apiVersion: apps/v1 kind: DaemonSet metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: selector: matchLabels: @@ -12,10 +11,9 @@ spec: template: metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: name: it-monit-logs-collector-fluentbit - {{ toYaml .Values.commonLabels | nindent 8 }} spec: serviceAccountName: it-monit-logs-collector-fluentbit containers: @@ -63,3 +61,4 @@ spec: {{- if .Values.logs.fluentbit.extraVolumes }} {{- toYaml .Values.logs.fluentbit.extraVolumes | nindent 4 }} {{- end }} +{{- end -}} diff --git a/templates/fluentbit-logs/serviceaccount.yaml b/templates/fluentbit-logs/serviceaccount.yaml index fb1a73e..0a53d3f 100644 --- a/templates/fluentbit-logs/serviceaccount.yaml +++ b/templates/fluentbit-logs/serviceaccount.yaml @@ -1,7 +1,7 @@ +{{- if and .Values.logs.enabled .Values.logs.fluentbit.enabled -}} apiVersion: v1 kind: ServiceAccount metadata: name: it-monit-logs-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/fluentbit-metrics/configmap.yaml b/templates/fluentbit-metrics/configmap.yaml index c8b051e..6da50cf 100644 --- a/templates/fluentbit-metrics/configmap.yaml +++ b/templates/fluentbit-metrics/configmap.yaml @@ -1,11 +1,9 @@ -{{- if .Values.metrics.fluentbit.enable }} +{{- if and .Values.metrics.enabled .Values.metrics.fluentbit.enabled }} apiVersion: v1 kind: ConfigMap metadata: name: it-monit-metrics-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} data: fluent-bit.yaml: service: diff --git a/templates/fluentbit-metrics/statefulset.yaml b/templates/fluentbit-metrics/statefulset.yaml index 0902e87..62208ba 100644 --- a/templates/fluentbit-metrics/statefulset.yaml +++ b/templates/fluentbit-metrics/statefulset.yaml @@ -1,11 +1,9 @@ -{{- if .Values.metrics.fluentbit.enable }} +{{- if and .Values.metrics.enabled .Values.metrics.fluentbit.enabled }} apiVersion: apps/v1 kind: StatefulSet metadata: name: it-monit-metrics-collector-fluentbit - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: selector: matchLabels: @@ -14,10 +12,9 @@ spec: template: metadata: name: it-monit-metrics-collector-fluentbit - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: name: it-monit-metrics-collector-fluentbit - {{ toYaml .Values.commonLabels | nindent 8 }} spec: containers: - name: it-monit-logs-collector-fluentbit diff --git a/templates/ingress_nginx/servicemonitor.yaml b/templates/ingress_nginx/servicemonitor.yaml index 46354ce..5d94bf2 100644 --- a/templates/ingress_nginx/servicemonitor.yaml +++ b/templates/ingress_nginx/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-ingress-nginx - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - interval: 15s @@ -21,4 +20,5 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/instance: cern-magnum - app.kubernetes.io/name: ingress-nginx \ No newline at end of file + app.kubernetes.io/name: ingress-nginx +{{- end -}} diff --git a/templates/kube_state/clusterrole.yaml b/templates/kube_state/clusterrole.yaml index 734acad..7d710ad 100644 --- a/templates/kube_state/clusterrole.yaml +++ b/templates/kube_state/clusterrole.yaml @@ -1,11 +1,11 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: app.kubernetes.io/name: it-monit-metrics-collector-kubestate - {{ toYaml .Values.commonLabels | nindent 4 }} rules: - apiGroups: - "" @@ -125,4 +125,5 @@ rules: - roles verbs: - list - - watch \ No newline at end of file + - watch +{{- end -}} diff --git a/templates/kube_state/clusterrolebinding.yaml b/templates/kube_state/clusterrolebinding.yaml index 140e954..5c6c3bf 100644 --- a/templates/kube_state/clusterrolebinding.yaml +++ b/templates/kube_state/clusterrolebinding.yaml @@ -1,11 +1,11 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: app.kubernetes.io/name: it-monit-metrics-collector-kubestate - {{ toYaml .Values.commonLabels | nindent 4 }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -13,4 +13,5 @@ roleRef: subjects: - kind: ServiceAccount name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/kube_state/deployment.yaml b/templates/kube_state/deployment.yaml index 77888db..b3f26e4 100644 --- a/templates/kube_state/deployment.yaml +++ b/templates/kube_state/deployment.yaml @@ -1,11 +1,11 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: apps/v1 kind: Deployment metadata: name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: app.kubernetes.io/name: it-monit-metrics-collector-kubestate - {{ toYaml .Values.commonLabels | nindent 4 }} spec: replicas: 1 selector: @@ -49,4 +49,5 @@ spec: type: RuntimeDefault nodeSelector: kubernetes.io/os: linux - serviceAccountName: it-monit-metrics-collector-kubestate \ No newline at end of file + serviceAccountName: it-monit-metrics-collector-kubestate +{{- end -}} diff --git a/templates/kube_state/service.yaml b/templates/kube_state/service.yaml index eab9258..d85fde1 100644 --- a/templates/kube_state/service.yaml +++ b/templates/kube_state/service.yaml @@ -1,11 +1,11 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: v1 kind: Service metadata: name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: app.kubernetes.io/name: it-monit-metrics-collector-kubestate - {{ toYaml .Values.commonLabels | nindent 4 }} spec: clusterIP: None ports: @@ -17,3 +17,4 @@ spec: targetPort: telemetry selector: app.kubernetes.io/name: it-monit-metrics-collector-kubestate +{{- end -}} diff --git a/templates/kube_state/serviceaccount.yaml b/templates/kube_state/serviceaccount.yaml index c78d016..a3fb279 100644 --- a/templates/kube_state/serviceaccount.yaml +++ b/templates/kube_state/serviceaccount.yaml @@ -1,9 +1,10 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: v1 automountServiceAccountToken: false kind: ServiceAccount metadata: name: it-monit-metrics-collector-kubestate - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} labels: app.kubernetes.io/name: it-monit-metrics-collector-kubestate - {{ toYaml .Values.commonLabels | nindent 4 }} \ No newline at end of file +{{- end -}} diff --git a/templates/kube_state/servicemonitor.yaml b/templates/kube_state/servicemonitor.yaml index 61bd572..fe078eb 100644 --- a/templates/kube_state/servicemonitor.yaml +++ b/templates/kube_state/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.kubeState.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-kubestate - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: jobLabel: k8s-app @@ -21,4 +20,5 @@ spec: app.kubernetes.io/name: "it-monit-metrics-collector-kubestate" namespaceSelector: matchNames: - - {{ .Values.namespace }} \ No newline at end of file + - {{ .Release.namespace }} +{{- end -}} diff --git a/templates/kubecontroller/servicemonitor.yaml b/templates/kubecontroller/servicemonitor.yaml index 6680cd9..4a12859 100644 --- a/templates/kubecontroller/servicemonitor.yaml +++ b/templates/kubecontroller/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-kubecontroller - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -21,4 +20,5 @@ spec: selector: matchLabels: app: kube-prometheus-stack-kube-controller-manager - release: cern-magnum \ No newline at end of file + release: cern-magnum +{{- end -}} diff --git a/templates/kubelet/servicemonitor.yaml b/templates/kubelet/servicemonitor.yaml index 543fade..d4c0e8b 100644 --- a/templates/kubelet/servicemonitor.yaml +++ b/templates/kubelet/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-kubelet - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: jobLabel: k8s-app endpoints: @@ -27,4 +26,5 @@ spec: app.kubernetes.io/name: kubelet namespaceSelector: matchNames: - - kube-system \ No newline at end of file + - kube-system +{{- end -}} diff --git a/templates/kubeproxy/servicemonitor.yaml b/templates/kubeproxy/servicemonitor.yaml index 789cee5..d0e6e3d 100644 --- a/templates/kubeproxy/servicemonitor.yaml +++ b/templates/kubeproxy/servicemonitor.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: it-monit-metrics-servicemonitor-kubeproxy - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -21,4 +20,5 @@ spec: selector: matchLabels: app: kube-prometheus-stack-kube-proxy - release: cern-magnum \ No newline at end of file + release: cern-magnum +{{- end -}} diff --git a/templates/namespace.yaml b/templates/namespace.yaml deleted file mode 100644 index 3482dd4..0000000 --- a/templates/namespace.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} \ No newline at end of file diff --git a/templates/node_exporter/daemonset.yaml b/templates/node_exporter/daemonset.yaml index 1b28a4d..066166b 100644 --- a/templates/node_exporter/daemonset.yaml +++ b/templates/node_exporter/daemonset.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.nodeExporter.enabled -}} apiVersion: apps/v1 kind: DaemonSet metadata: name: it-monit-metrics-collector-nodeexporter - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: selector: matchLabels: @@ -61,3 +60,4 @@ spec: - hostPath: path: / name: root +{{- end -}} diff --git a/templates/node_exporter/podmonitor.yaml b/templates/node_exporter/podmonitor.yaml index 404ae28..2e9684e 100644 --- a/templates/node_exporter/podmonitor.yaml +++ b/templates/node_exporter/podmonitor.yaml @@ -1,14 +1,13 @@ +{{- if and .Values.metrics.enabled .Values.metrics.nodeExporter.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: it-monit-metrics-podmonitor-nodeexporter - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: namespaceSelector: matchNames: - - {{ .Values.namespace }} + - {{ .Release.namespace }} selector: matchLabels: app.kubernetes.io/name: node-exporter @@ -18,4 +17,5 @@ spec: - action: replace sourceLabels: - __meta_kubernetes_pod_node_name - targetLabel: instance \ No newline at end of file + targetLabel: instance +{{- end -}} diff --git a/templates/prometheus/clusterrole.yaml b/templates/prometheus/clusterrole.yaml index bde5da2..66a115b 100644 --- a/templates/prometheus/clusterrole.yaml +++ b/templates/prometheus/clusterrole.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} rules: - apiGroups: [""] resources: @@ -24,4 +23,5 @@ rules: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics"] - verbs: ["get"] \ No newline at end of file + verbs: ["get"] +{{- end -}} diff --git a/templates/prometheus/clusterrolebinding.yaml b/templates/prometheus/clusterrolebinding.yaml index c18eee4..6c0787d 100644 --- a/templates/prometheus/clusterrolebinding.yaml +++ b/templates/prometheus/clusterrolebinding.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -12,4 +11,5 @@ roleRef: subjects: - kind: ServiceAccount name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/prometheus/prometheus.yaml b/templates/prometheus/prometheus.yaml index 709ce2c..f4c2edf 100644 --- a/templates/prometheus/prometheus.yaml +++ b/templates/prometheus/prometheus.yaml @@ -1,25 +1,24 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: - version: {{ .Values.metrics.prometheusServer.version }} - scrapeInterval: {{ .Values.metrics.prometheusServer.scrapeInterval }} - scrapeTimeout: {{ .Values.metrics.prometheusServer.scrapeTimeout }} - retention: {{ .Values.metrics.prometheusServer.retention }} + version: {{ .Values.metrics.prometheus.server.version }} + scrapeInterval: {{ .Values.metrics.prometheus.server.scrapeInterval }} + scrapeTimeout: {{ .Values.metrics.prometheus.server.scrapeTimeout }} + retention: {{ .Values.metrics.prometheus.server.retention }} externalLabels: k8s_cluster_name: {{ .Values.k8sClusterName }} serviceAccountName: it-monit-metrics-collector-prometheus resources: requests: - memory: {{ .Values.metrics.prometheusServer.resources.requests.memory }} - cpu: {{ .Values.metrics.prometheusServer.resources.requests.cpu }} + memory: {{ .Values.metrics.prometheus.server.resources.requests.memory }} + cpu: {{ .Values.metrics.prometheus.server.resources.requests.cpu }} limits: - memory: {{ .Values.metrics.prometheusServer.resources.limits.memory }} - cpu: {{ .Values.metrics.prometheusServer.resources.limits.cpu }} + memory: {{ .Values.metrics.prometheus.server.resources.limits.memory }} + cpu: {{ .Values.metrics.prometheus.server.resources.limits.cpu }} enableAdminAPI: false # An empty label selector matches all objects serviceMonitorSelector: {} @@ -30,9 +29,9 @@ spec: probeNamespaceSelector: {} scrapeConfigSelector: {} scrapeConfigNamespaceSelector: {} - {{if .Values.metrics.prometheusServer.remoteWrite.endpoint }} + {{if .Values.metrics.prometheus.server.remoteWrite.endpoint }} remoteWrite: - - url: {{ .Values.metrics.prometheusServer.remoteWrite.endpoint }} + - url: {{ .Values.metrics.prometheus.server.remoteWrite.endpoint }} tlsConfig: insecureSkipVerify: true basicAuth: @@ -42,4 +41,5 @@ spec: password: name: it-monit-metrics-collector-prometheus key: password - {{ end }} \ No newline at end of file + {{ end }} +{{- end -}} diff --git a/templates/prometheus/remotewritesecret.yaml b/templates/prometheus/remotewritesecret.yaml index ed673cd..5c156cc 100644 --- a/templates/prometheus/remotewritesecret.yaml +++ b/templates/prometheus/remotewritesecret.yaml @@ -1,18 +1,18 @@ -{{ if .Values.metrics.prometheusServer.remoteWrite.endpoint }} +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} +{{ if .Values.metrics.prometheus.server.remoteWrite.endpoint }} apiVersion: v1 kind: Secret metadata: name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} type: kubernetes.io/basic-auth data: -{{- if and .Values.metrics.prometheusServer.remoteWrite.username .Values.metrics.prometheusServer.remoteWrite.password }} - username: {{ .Values.metrics.prometheusServer.remoteWrite.username | b64enc }} - password: {{ .Values.metrics.prometheusServer.remoteWrite.password | b64enc }} +{{- if and .Values.metrics.prometheus.server.remoteWrite.username .Values.metrics.prometheus.server.remoteWrite.password }} + username: {{ .Values.metrics.prometheus.server.remoteWrite.username | b64enc }} + password: {{ .Values.metrics.prometheus.server.remoteWrite.password | b64enc }} {{- else }} username: {{ .Values.tenantName | b64enc }} password: {{ .Values.tenantPassword | b64enc }} {{- end }} -{{ end }} \ No newline at end of file +{{ end }} +{{- end -}} diff --git a/templates/prometheus/serviceaccount.yaml b/templates/prometheus/serviceaccount.yaml index dfc5f73..65a4aba 100644 --- a/templates/prometheus/serviceaccount.yaml +++ b/templates/prometheus/serviceaccount.yaml @@ -1,7 +1,7 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: v1 kind: ServiceAccount metadata: name: it-monit-metrics-collector-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/prometheus_operator/clusterrole.yaml b/templates/prometheus_operator/clusterrole.yaml index a555b69..ce26ced 100644 --- a/templates/prometheus_operator/clusterrole.yaml +++ b/templates/prometheus_operator/clusterrole.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} rules: - apiGroups: - monitoring.coreos.com @@ -95,4 +94,5 @@ rules: resources: - storageclasses verbs: - - get \ No newline at end of file + - get +{{- end -}} diff --git a/templates/prometheus_operator/clusterrolebinding.yaml b/templates/prometheus_operator/clusterrolebinding.yaml index ff79241..e2a2760 100644 --- a/templates/prometheus_operator/clusterrolebinding.yaml +++ b/templates/prometheus_operator/clusterrolebinding.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -12,4 +11,5 @@ roleRef: subjects: - kind: ServiceAccount name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/prometheus_operator/deployment.yaml b/templates/prometheus_operator/deployment.yaml index f933661..558fe9e 100644 --- a/templates/prometheus_operator/deployment.yaml +++ b/templates/prometheus_operator/deployment.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: apps/v1 kind: Deployment metadata: name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: replicas: 1 selector: @@ -32,11 +31,11 @@ spec: name: http resources: limits: - cpu: {{ .Values.metrics.prometheusOperator.resources.limits.cpu }} - memory: {{ .Values.metrics.prometheusOperator.resources.limits.memory }} + cpu: {{ .Values.metrics.prometheus.operator.resources.limits.cpu }} + memory: {{ .Values.metrics.prometheus.operator.resources.limits.memory }} requests: - cpu: {{ .Values.metrics.prometheusOperator.resources.requests.cpu }} - memory: {{ .Values.metrics.prometheusOperator.resources.requests.memory }} + cpu: {{ .Values.metrics.prometheus.operator.resources.requests.cpu }} + memory: {{ .Values.metrics.prometheus.operator.resources.requests.memory }} securityContext: allowPrivilegeEscalation: false capabilities: @@ -50,4 +49,5 @@ spec: runAsUser: 65534 seccompProfile: type: RuntimeDefault - serviceAccountName: it-monit-metrics-operator-prometheus \ No newline at end of file + serviceAccountName: it-monit-metrics-operator-prometheus +{{- end -}} diff --git a/templates/prometheus_operator/service.yaml b/templates/prometheus_operator/service.yaml index 268c8de..a7d3162 100644 --- a/templates/prometheus_operator/service.yaml +++ b/templates/prometheus_operator/service.yaml @@ -1,10 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: v1 kind: Service metadata: name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} + namespace: {{ .Release.namespace }} spec: clusterIP: None ports: @@ -14,3 +13,4 @@ spec: selector: app.kubernetes.io/component: controller app.kubernetes.io/name: it-monit-metrics-operator-prometheus +{{- end -}} diff --git a/templates/prometheus_operator/serviceaccount.yaml b/templates/prometheus_operator/serviceaccount.yaml index 25200bb..2e83b24 100644 --- a/templates/prometheus_operator/serviceaccount.yaml +++ b/templates/prometheus_operator/serviceaccount.yaml @@ -1,8 +1,8 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: v1 automountServiceAccountToken: false kind: ServiceAccount metadata: name: it-monit-metrics-operator-prometheus - namespace: {{ .Values.namespace }} - labels: - {{ toYaml .Values.commonLabels | nindent 4 }} \ No newline at end of file + namespace: {{ .Release.namespace }} +{{- end -}} diff --git a/templates/scheduler/servicemonitor.yaml b/templates/scheduler/servicemonitor.yaml index cd75947..0d272dd 100644 --- a/templates/scheduler/servicemonitor.yaml +++ b/templates/scheduler/servicemonitor.yaml @@ -1,8 +1,9 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheus.enabled -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: servicemonitor-scheduler - namespace: {{ .Values.namespace }} + namespace: {{ .Release.namespace }} spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -19,4 +20,5 @@ spec: selector: matchLabels: app: kube-prometheus-stack-kube-scheduler - release: cern-magnum \ No newline at end of file + release: cern-magnum +{{- end -}} diff --git a/values.yaml b/values.yaml index 14aecf6..0d00795 100644 --- a/values.yaml +++ b/values.yaml @@ -1,31 +1,35 @@ -monitMetricsCollectionEndpoint: "monit-otlp.cern.ch" -monitMetricsCollectionPort: 4319 -monitLogsCollectionEndpoint: "monit-otlp.cern.ch" -monitLogsCollectionPort: 4319 -monitTracesCollectionEndpoint: "monit-otlp.cern.ch" -monitTracesCollectionPort: 4319 - -# k8sClusterName is a label that will be added to all metrics and logs in the central monitoring. It is usefull to track the origin of the metrics and logs. -k8sClusterName: "YOUR-CLUSTER-NAME" -# tenantName is the name of the tenant that you need to use to send your metrics and logs to the central monitoring. If you do not have one open a ticket to the monitoring service. -tenantName: "YOUR-TENANT-NAME" -# tenantName is the password of the tenant that you need to use to send your metrics and logs to the central monitoring. If you do not have one open a ticket to the monitoring service. -tenantPassword: "YOUR-TENANT-PASSWORD" - -# commonLabels is a set of labels and values that will be added to every resource created by this helm chart. -commonLabels: - helm.sh/chart: "cern-it-monitoring-kubernetes-0-1-0" - app.kubernetes.io/managed-by: "Helm" - app.kubernetes.io/instance: "cern-it-monitoring-kubernetes-base" - app.kubernetes.io/version: "0.1.0" - -# Namespace where all the components will be deployed. -namespace: monitoring +# OTLP default configuration. +otlp: + # -- otlp endpoint where the otlp receivers are listening + endpoint: "monit-otlp.cern.ch" + # -- otlp port where the otlp receivers are listening + port: 4319 +# Tenant configuration. Username and Password are provided via CERN Central IT +# Monitoring service. +tenant: + # -- username used for authenitcating in the MONIT infrastructure + name: nil + # -- password (plain) used for authenitcating in the MONIT infrastructure + password: nil + +# Kubernetes configuration. +kubernetes: + # -- name of the kubernetes cluster to monitor. This value will be appended to very metric and log via k8sClusterName label + clusterName: nil + +# The metrics section includes all the components meant to produce, scrape, +# collect or forward metrics. You can configure all components independently. metrics: - enable: true + # -- indicates if all metrics components should be enabled or not. If set to false no metrics component will be installed nor configured + enabled: true + # Node exporter is used to scrape node resources metrics like cpu, memory + # or network. nodeExporter: + # -- if true node exporter will be installed as a daemon set together with a pod monitor + enabled: true + # -- indicates how often node exporter will be scraped by the local prometheus scrapeInterval: "15s" resources: requests: @@ -34,18 +38,25 @@ metrics: limits: cpu: "20m" memory: "25Mi" - - metricsServer: - scrapeInterval: "15s" + + # Metrics server specific confiuration. + metricsserver: + # -- if true metrics server will be installed + enabled: true resources: requests: - cpu: "5m" - memory: "15Mi" + cpu: "100m" + memory: "200Mi" limits: - cpu: "20m" - memory: "25Mi" + cpu: "100m" + memory: "200Mi" + # Kube state is used to scrape metrics from kubernetes api, like limits + # and resources. kubeState: + # -- if true kube state will be installed together with a service monitor + enabled: true + # -- indicates how often node exporter will be scraped by the local prometheus scrapeInterval: "15s" resources: requests: @@ -55,41 +66,51 @@ metrics: cpu: "20m" memory: "25Mi" - prometheusOperator: - resources: - requests: - cpu: "5m" - memory: "25Mi" - limits: - cpu: "100m" - memory: "100Mi" - - prometheusServer: - # version is the prometheus server image tag that will be used for the local prometheus server running in the cluster. - version: "v2.50.0" - # scrapeInterval is the default interval that the local cluster promtheus will use to scrape its targets. - scrapeInterval: "10s" - # scrapeTimeout is the default timeout that the local cluster promtheus will use when scraping its targets. - scrapeTimeout: "5s" - # retention is the retention period that the in-cluster prometheus will hold the metrics. Useful in case central monitoring is not available to get metrics from there. - retention: "24h" - # extraLabelsForMetrics is a set of static labels and values to add to all the metrics gathered by the in-cluster prometheus when exported to central monitoring. - extraLabelsForMetrics: [] - # remoteWriteEnable indicates wether prometheus will be configured to remote write to the given endpoint or not. - remoteWrite: {} - # endpoint: "https://monit-prom-mom.cern.ch:9090/api/v1/write" - # username: "your user" # If user and password are not provided then tenantName and tenantPassword will be used. - # password: "your password" - resources: - requests: - cpu: "100m" - memory: "2Gi" - limits: - cpu: "500m" - memory: "5Gi" + # Prometheus operator is used to deploy and configure the prometheus that will + # scrape and forward the metrics from the cluster. + prometheus: + # -- if true prometheus operator and a prometheus server will be installed + enabled: true + # -- specific configuration for the prometheus operator + operator: + resources: + requests: + cpu: "5m" + memory: "25Mi" + limits: + cpu: "100m" + memory: "100Mi" + server: + # -- prometheus version to use by the local cluster prometheus + version: "v2.50.0" + # -- interval used to self scrape metrics + scrapeInterval: "10s" + # -- timeout for self scraped metrics + scrapeTimeout: "5s" + # -- interval during which local cluster prometheus will store metrics + retention: "24h" + # -- set of static labels and values to add to all the metrics gathered by the in-cluster prometheus when exported to central monitoring + extraLabelsForMetrics: [] + # -- remote write prometheus configuration + remoteWrite: {} + # endpoint: "https://monit-prom-mom.cern.ch:9090/api/v1/write" + # username: "your user" # If user and password are not provided then + # tenantName and tenantPassword will be used. + # password: "your password" + resources: + requests: + cpu: "100m" + memory: "2Gi" + limits: + cpu: "500m" + memory: "5Gi" - # This is intended to be the future forwarding agent. + # This fluentbit is used to allow scraping and fordwarding metrics from + # the local prometheus and send them to Open Telemetry Collector. + # If the local .Values.metrics.prometheus enable=false will not be able + # to scrape from local prometheus. Provide different inputs then. fluentbit: + # -- if true prometheus operator and a prometheus server will be installed enable: true resources: requests: @@ -100,9 +121,12 @@ metrics: memory: "500Mi" matchQuery: "match[]={job!=\"\"}" + # -- interval used by fluentbit to scrape metrics from prometheus prometheusScrapeInterval: "60s" + # -- fluentbit buffer size. The more metrics to send the bigger needs to be prometheusScrapeBufferMaxSize: "100M" + # -- fluentbit service configuration options in a multiline string service: | daemon: off flush: 1 @@ -112,35 +136,45 @@ metrics: http_port: 2020 health_check: on + # -- fluentbit inputs as a yaml list in a multiline string inputs: | - name: prometheus_scrape tag: monit.prom.k8s - host: prometheus-operated.{{ .Values.namespace }}.svc.cluster.local + host: prometheus-operated.{{ .Release.namespace }}.svc.cluster.local port: 9090 scrape_interval: {{ .Values.metrics.fluentbit.prometheusScrapeInterval }} metrics_path: /federate?{{ .Values.metrics.fluentbit.matchQuery }} buffer_max_size: {{ .Values.metrics.fluentbit.prometheusScrapeBufferMaxSize }} - filters: null + # -- fluentbit filters as a yaml list in a multiline string + filters: nil + # -- fluentbit outputs as a yaml list in a multiline string outputs: | - name: opentelemetry match: monit.prom.k8s - host: {{ .Values.monitMetricsCollectionEndpoint }} - port: {{ .Values.monitMetricsCollectionPort }} + host: {{ .Values.otlp.endpoint }} + port: {{ .Values.otlp.port }} metrics_uri: /v1/metrics logs_uri: /v1/logs traces_uri: /v1/traces tls: on tls.verify: off - http_user: {{ .Values.tenantName }} - http_passwd: {{ .Values.tenantPassword }} + http_user: {{ .Values.tenant.name }} + http_passwd: {{ .Values.tenant.password }} logs: - enable: true + # -- indicates if logs metrics components should be enabled or not. If set to false no logs component will be installed nor configured + enabled: false + # Fluentbit is deployed as a daemon set to gather logs. Be careful on big + # deployments (100+) as the fluentbit kubernetes filter will produce requests + # to the kubernetes API. fluentbit: - # scrapeInterval is the interval that the local prometheus server will use to scrape the node exporters. + # -- indicates if fluentbit logs component should be installed or not + enabled: false + + # -- interval used by the local prometheus (if installed) to scrape metrics from logs fluentbits scrapeInterval: "15s" resources: requests: @@ -150,6 +184,7 @@ logs: cpu: "20m" memory: "25Mi" + # -- fluentbit service configuration options in a multiline string service: | [SERVICE] Daemon Off @@ -162,6 +197,7 @@ logs: HTTP_Port 2020 Health_Check On + # -- fluentbit inputs as a yaml list in a multiline string inputs: | [INPUT] Name tail @@ -171,6 +207,7 @@ logs: Mem_Buf_Limit 20MB Skip_Long_Lines Off + # -- fluentbit filters as a yaml list in a multiline string filters: | [FILTER] Name kubernetes @@ -201,18 +238,25 @@ logs: Nested_under kubernetes_labels Add_prefix kubernetes_labels_ + # -- fluentbit outputs as a yaml list in a multiline string outputs: | [OUTPUT] - name loki + name opentelemetry match * - labels job=kubernetes,kubernertes_cluster_name={{ .Values.k8sClusterName }} - host monit-loki-mom.cern.ch - port 443 - tls on - http_user {{ .Values.tenantName }} - http_passwd {{ .Values.tenantPassword }} - line_format json + labels job=kubernetes,k8s_cluster_name={{ .Values.kubernetes.clusterName }} + host {{ .Values.otlp.endpoint }} + port {{ .Values.otlp.port }} + metrics_uri: /v1/metrics + logs_uri: /v1/logs + traces_uri: /v1/traces + tls: on + tls.verify: off + http_user: {{ .Values.tenant.name }} + http_passwd: {{ .Values.tenant.password }} + ## -- fluentbit custom parsers customParsers: "" + ## -- extra volumes meant to be used in the fluentbits, can be used to scrape metrics from pvcs extraVolumes: [] + ## -- extra volumes to mount in the fluentbits, can be used to scrape metrics from pvcs extraVolumeMounts: [] -- GitLab