From 3fb857fe4ce35abdf248974482cce7054be3819b Mon Sep 17 00:00:00 2001 From: Guillermo Facundo Colunga <guillermo.facundo.colunga@cern.ch> Date: Wed, 27 Nov 2024 16:10:44 +0100 Subject: [PATCH] otel-metrics: enable remote write from prom to fb Some users reported that the current implementation of the metrics flow it is causing errors in the fluent bit components that scrape the metrics from the local prometheus and forwards to open telemetry. This commits inverts the paradigm and now it is prometheus the one doing the remote write into the local fluent bit. After that, fluent bit is doing a remote write as it was doing previously into the monitoring infra. Reported-at: https://its.cern.ch/jira/browse/MONIT-4077 Signed-off-by: Guillermo Facundo Colunga <guillermo.facundo.colunga@gmail.com> Signed-off-by: Guillermo Facundo Colunga <guillermo.facundo.colunga@cern.ch> --- templates/fluentbit-metrics/service.yaml | 16 +++++++ templates/fluentbit-metrics/statefulset.yaml | 2 +- templates/prometheus/prometheus.yaml | 45 +++++++++++--------- values.yaml | 35 +++++++++------ 4 files changed, 64 insertions(+), 34 deletions(-) create mode 100644 templates/fluentbit-metrics/service.yaml diff --git a/templates/fluentbit-metrics/service.yaml b/templates/fluentbit-metrics/service.yaml new file mode 100644 index 0000000..194a684 --- /dev/null +++ b/templates/fluentbit-metrics/service.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.metrics.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: it-monit-metrics-fluentbit + namespace: {{ .Release.Namespace }} + labels: + name: it-monit-metrics-collector-fluentbit +spec: + clusterIP: None + ports: + - name: http + port: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.port }} + selector: + name: it-monit-metrics-collector-fluentbit +{{- end -}} diff --git a/templates/fluentbit-metrics/statefulset.yaml b/templates/fluentbit-metrics/statefulset.yaml index 15f0f23..32eff77 100644 --- a/templates/fluentbit-metrics/statefulset.yaml +++ b/templates/fluentbit-metrics/statefulset.yaml @@ -8,7 +8,7 @@ spec: selector: matchLabels: name: it-monit-metrics-collector-fluentbit - replicas: 1 + replicas: {{ .Values.metrics.fluentbit.replicas }} template: metadata: name: it-monit-metrics-collector-fluentbit diff --git a/templates/prometheus/prometheus.yaml b/templates/prometheus/prometheus.yaml index 65382e9..10b2573 100644 --- a/templates/prometheus/prometheus.yaml +++ b/templates/prometheus/prometheus.yaml @@ -37,24 +37,31 @@ spec: scrapeConfigNamespaceSelector: {} ruleSelector: {} ruleNamespaceSelector: {} - {{if .Values.metrics.prometheus.server.remoteWrite.endpoint }} remoteWrite: - - url: {{ .Values.metrics.prometheus.server.remoteWrite.endpoint }} - tlsConfig: - insecureSkipVerify: true - basicAuth: - username: - name: it-monit-metrics-collector-prometheus - key: username - password: - name: it-monit-metrics-collector-prometheus - key: password - {{ end }} - {{- if .Values.metrics.alertmanager.enabled }} - alerting: - alertmanagers: - - namespace: {{ .Release.Namespace }} - name: it-monit-alertmanager - port: http - {{- end }} + - url: "http://it-monit-metrics-fluentbit:8080/api/prom/push" + protobuf_message: io.prometheus.write.v2.Request + queue_config: + capacity: 5000 + max_samples_per_send: 1000 + batch_send_deadline: 5s + write_relabel_configs: + - source_labels: [__name__] + regex: 'temp.*' + action: drop + - regex: '(id|uuid)' + action: labeldrop + metadata_config: + send: false + {{if .Values.metrics.prometheus.server.remoteWrite.endpoint }} + - url: {{ .Values.metrics.prometheus.server.remoteWrite.endpoint }} + tlsConfig: + insecureSkipVerify: true + basicAuth: + username: + name: it-monit-metrics-collector-prometheus + key: username + password: + name: it-monit-metrics-collector-prometheus + key: password + {{ end }} {{- end -}} diff --git a/values.yaml b/values.yaml index a49c9ee..c5e5dd6 100644 --- a/values.yaml +++ b/values.yaml @@ -125,6 +125,7 @@ metrics: fluentbit: # -- if true fluentbit metrics forwarder will be installed enabled: true + replicas: 2 # If set it will override the metrics.defaultNodeSelector. nodeSelector: {} resources: @@ -134,12 +135,16 @@ metrics: limits: cpu: "1" memory: "1Gi" - - matchQuery: "match[]={job!=\"\"}" - # -- interval used by fluentbit to scrape metrics from prometheus - prometheusScrapeInterval: "60s" - # -- fluentbit buffer size. The more metrics to send the bigger needs to be - prometheusScrapeBufferMaxSize: "100M" + prometheusRemoteWriteInputConfig: + listen: 0.0.0.0 + port: 8080 + bufferMaxSize: 2G + bufferChunkSize: 128M + successfulResponseCode: 201 + tagFromUri: false + tag: monit.prom.k8s + uri: /api/prom/push + threaded: false # -- max size for in-disk storage for fluent-bit diskMaxCache: "5G" @@ -163,14 +168,16 @@ metrics: # -- fluentbit inputs as a yaml list in a multiline string inputs: | - - name: prometheus_scrape - tag: monit.prom.k8s - host: prometheus-operated.{{ .Release.Namespace }}.svc.cluster.local - port: 9090 - storage.type: filesystem - scrape_interval: {{ .Values.metrics.fluentbit.prometheusScrapeInterval }} - metrics_path: /federate?{{ .Values.metrics.fluentbit.matchQuery }} - buffer_max_size: {{ .Values.metrics.fluentbit.prometheusScrapeBufferMaxSize }} + - name: prometheus_remote_write + tag: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.tag }} + listen: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.listen }} + port: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.port }} + uri: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.uri }} + buffer_max_size: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.bufferMaxSize }} + buffer_chunk_size: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.bufferChunkSize }} + successful_response_code: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.successfulResponseCode }} + tag_from_uri: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.tagFromUri }} + threaded: {{ .Values.metrics.fluentbit.prometheusRemoteWriteInputConfig.threaded }} # -- fluentbit filters as a yaml list in a multiline string filters: | -- GitLab