From ab36b6c3ab441827447be0bd15aae097272cbf09 Mon Sep 17 00:00:00 2001 From: Borja Garrido Bear <borja.garrido.bear@cern.ch> Date: Tue, 2 Jul 2024 10:43:37 +0200 Subject: [PATCH] [MONIT-3959] Add fluentbit forwarding for metrics. This commit adds a new component to forward metrics. A fluent-bit instance devoted to read metrics from the local prometheus and send them to wherever configured. By default this is used to send metrics to OTEL, but can be configured to have multiple flows. Author: Borja Garrido Bear <borja.garrido.bear@cern.ch> Signed-off-by: Borja Garrido Bear <borja.garrido.bear@cern.ch>, Guillermo Facundo Colunga <guillermo.facundo.colunga@cern.ch> --- README.md | 10 +++- .../clusterrole.yaml | 0 .../clusterrolebinding.yaml | 0 .../configmap.yaml | 0 .../daemonset.yaml | 0 .../serviceaccount.yaml | 0 templates/fluentbit-metrics/configmap.yaml | 26 ++++++++ templates/fluentbit-metrics/statefulset.yaml | 49 +++++++++++++++ values.yaml | 59 ++++++++++++++++--- 9 files changed, 136 insertions(+), 8 deletions(-) rename templates/{fluentbit => fluentbit-logs}/clusterrole.yaml (100%) rename templates/{fluentbit => fluentbit-logs}/clusterrolebinding.yaml (100%) rename templates/{fluentbit => fluentbit-logs}/configmap.yaml (100%) rename templates/{fluentbit => fluentbit-logs}/daemonset.yaml (100%) rename templates/{fluentbit => fluentbit-logs}/serviceaccount.yaml (100%) create mode 100644 templates/fluentbit-metrics/configmap.yaml create mode 100644 templates/fluentbit-metrics/statefulset.yaml diff --git a/README.md b/README.md index 734f08b..7856140 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,14 @@ Further customization can be achieved by exploring the `values.yaml` file from t Metrics collection is enabled by default. You can customize the settings for different components like `nodeExporter`, `metricsServer`, `kubeState`, `prometheusOperator`, and `prometheusServer`. +#### Fluentbit + +You can enable fluentbit forwarder to otlp in order to forward your metrics to MONIT (.Values.metrics.fluentbit.enable). +By default it will scrape your MONIT managed Prometheus every 60 seconds and send all the metrics available, if you just want to send a subset of them please us the .Values.metrics.fluentbit.matchQuery parameter. +Have into consideration that the current limits are set for a small cluster, if you have more metrics to send you might need to customise the buffer for the input and the memory and cpu limits for the forwarder. + +Please make sure you don't send your metrics twice (i.e if you have configured the remote write in Prometheus already). + ### Logs Logs collection is enabled by default using Fluent Bit. Customize Fluent Bit settings for `service`, `inputs`, `filters`, and `outputs`. @@ -48,4 +56,4 @@ Logs collection is enabled by default using Fluent Bit. Customize Fluent Bit set ## Support -If you encounter any issues or have questions, please open a ticket with the CERN IT Monitoring service. \ No newline at end of file +If you encounter any issues or have questions, please open a ticket with the CERN IT Monitoring service. diff --git a/templates/fluentbit/clusterrole.yaml b/templates/fluentbit-logs/clusterrole.yaml similarity index 100% rename from templates/fluentbit/clusterrole.yaml rename to templates/fluentbit-logs/clusterrole.yaml diff --git a/templates/fluentbit/clusterrolebinding.yaml b/templates/fluentbit-logs/clusterrolebinding.yaml similarity index 100% rename from templates/fluentbit/clusterrolebinding.yaml rename to templates/fluentbit-logs/clusterrolebinding.yaml diff --git a/templates/fluentbit/configmap.yaml b/templates/fluentbit-logs/configmap.yaml similarity index 100% rename from templates/fluentbit/configmap.yaml rename to templates/fluentbit-logs/configmap.yaml diff --git a/templates/fluentbit/daemonset.yaml b/templates/fluentbit-logs/daemonset.yaml similarity index 100% rename from templates/fluentbit/daemonset.yaml rename to templates/fluentbit-logs/daemonset.yaml diff --git a/templates/fluentbit/serviceaccount.yaml b/templates/fluentbit-logs/serviceaccount.yaml similarity index 100% rename from templates/fluentbit/serviceaccount.yaml rename to templates/fluentbit-logs/serviceaccount.yaml diff --git a/templates/fluentbit-metrics/configmap.yaml b/templates/fluentbit-metrics/configmap.yaml new file mode 100644 index 0000000..c8b051e --- /dev/null +++ b/templates/fluentbit-metrics/configmap.yaml @@ -0,0 +1,26 @@ +{{- if .Values.metrics.fluentbit.enable }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: it-monit-metrics-collector-fluentbit + namespace: {{ .Values.namespace }} + labels: + {{ toYaml .Values.commonLabels | nindent 4 }} +data: + fluent-bit.yaml: + service: + {{- (tpl .Values.metrics.fluentbit.service $) | nindent 6}} + pipeline: + {{- if .Values.metrics.fluentbit.inputs }} + inputs: + {{- (tpl .Values.metrics.fluentbit.inputs $) | nindent 8}} + {{- end -}} + {{- if .Values.metrics.fluentbit.filters }} + filters: + {{- (tpl .Values.metrics.fluentbit.filters $) | nindent 8}} + {{- end -}} + {{- if .Values.metrics.fluentbit.outputs }} + outputs: + {{- (tpl .Values.metrics.fluentbit.outputs $) | nindent 8}} + {{- end -}} +{{- end }} diff --git a/templates/fluentbit-metrics/statefulset.yaml b/templates/fluentbit-metrics/statefulset.yaml new file mode 100644 index 0000000..0902e87 --- /dev/null +++ b/templates/fluentbit-metrics/statefulset.yaml @@ -0,0 +1,49 @@ +{{- if .Values.metrics.fluentbit.enable }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: it-monit-metrics-collector-fluentbit + namespace: {{ .Values.namespace }} + labels: + {{ toYaml .Values.commonLabels | nindent 4 }} +spec: + selector: + matchLabels: + name: it-monit-metrics-collector-fluentbit + replicas: 1 + template: + metadata: + name: it-monit-metrics-collector-fluentbit + namespace: {{ .Values.namespace }} + labels: + name: it-monit-metrics-collector-fluentbit + {{ toYaml .Values.commonLabels | nindent 8 }} + spec: + containers: + - name: it-monit-logs-collector-fluentbit + image: fluent/fluent-bit:3.0.6 # Not valid for windows nodes. T.B.E. (To Be Explored) + command: [ "/fluent-bit/bin/fluent-bit" ] + args: + - --workdir=/fluent-bit/etc + - --config=/fluent-bit/etc/conf/fluent-bit.yaml + resources: + requests: + cpu: {{ .Values.metrics.fluentbit.resources.requests.cpu }} + memory: {{ .Values.metrics.fluentbit.resources.requests.memory }} + limits: + cpu: {{ .Values.metrics.fluentbit.resources.limits.cpu }} + memory: {{ .Values.metrics.fluentbit.resources.limits.memory }} + volumeMounts: + - name: config + mountPath: /fluent-bit/etc/conf + {{- if .Values.metrics.fluentbit.extraVolumeMounts }} + {{- toYaml .Values.metrics.fluentbit.extraVolumeMounts | nindent 6 }} + {{- end }} + volumes: + - name: config + configMap: + name: it-monit-metrics-collector-fluentbit + {{- if .Values.metrics.fluentbit.extraVolumes }} + {{- toYaml .Values.metrics.fluentbit.extraVolumes | nindent 4 }} + {{- end }} +{{- end }} diff --git a/values.yaml b/values.yaml index 416f772..14aecf6 100644 --- a/values.yaml +++ b/values.yaml @@ -1,9 +1,9 @@ -monitMetricsCollectionEndpoint: "https://monit-otlp.cern.ch" -monitMetricsCollectionPort: "" -monitLogsCollectionEndpoint: "https://monit-otlp.cern.ch" -monitLogsCollectionPort: "" -monitTracesCollectionEndpoint: "https://monit-otlp.cern.ch" -monitTracesCollectionPort: "" +monitMetricsCollectionEndpoint: "monit-otlp.cern.ch" +monitMetricsCollectionPort: 4319 +monitLogsCollectionEndpoint: "monit-otlp.cern.ch" +monitLogsCollectionPort: 4319 +monitTracesCollectionEndpoint: "monit-otlp.cern.ch" +monitTracesCollectionPort: 4319 # k8sClusterName is a label that will be added to all metrics and logs in the central monitoring. It is usefull to track the origin of the metrics and logs. k8sClusterName: "YOUR-CLUSTER-NAME" @@ -89,7 +89,52 @@ metrics: memory: "5Gi" # This is intended to be the future forwarding agent. - fluentbit: {} + fluentbit: + enable: true + resources: + requests: + cpu: "1" + memory: "150Mi" + limits: + cpu: "1" + memory: "500Mi" + + matchQuery: "match[]={job!=\"\"}" + prometheusScrapeInterval: "60s" + prometheusScrapeBufferMaxSize: "100M" + + service: | + daemon: off + flush: 1 + log_level: info + http_server: on + http_listen: 0.0.0.0 + http_port: 2020 + health_check: on + + inputs: | + - name: prometheus_scrape + tag: monit.prom.k8s + host: prometheus-operated.{{ .Values.namespace }}.svc.cluster.local + port: 9090 + scrape_interval: {{ .Values.metrics.fluentbit.prometheusScrapeInterval }} + metrics_path: /federate?{{ .Values.metrics.fluentbit.matchQuery }} + buffer_max_size: {{ .Values.metrics.fluentbit.prometheusScrapeBufferMaxSize }} + + filters: null + + outputs: | + - name: opentelemetry + match: monit.prom.k8s + host: {{ .Values.monitMetricsCollectionEndpoint }} + port: {{ .Values.monitMetricsCollectionPort }} + metrics_uri: /v1/metrics + logs_uri: /v1/logs + traces_uri: /v1/traces + tls: on + tls.verify: off + http_user: {{ .Values.tenantName }} + http_passwd: {{ .Values.tenantPassword }} logs: enable: true -- GitLab