diff --git a/.helmignore b/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/Chart.yaml b/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bd97d6f0999ab26155096404221dd8a37261e8c --- /dev/null +++ b/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: cern-it-monitoring-kubernetes +type: application +appVersion: v0.1.0 +version: 0.1.0 +kubeVersion: ">=1.21.0-0" +description: Helm Chart provided by IT Monitoring Service to install and configure required components to gather and send monitoring data from kubernetes clusters to central service. +home: https://cern.ch/monitoring +dependencies: + - name: prometheus-operator-crds + repository: https://prometheus-community.github.io/helm-charts + version: 11.0.0 \ No newline at end of file diff --git a/templates/kubeState.yaml b/templates/kubeState.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d5bdd734a20b15b980a57269277d15442023863 --- /dev/null +++ b/templates/kubeState.yaml @@ -0,0 +1,223 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - serviceaccounts + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingressclasses + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + - clusterroles + - rolebindings + - roles + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + spec: + automountServiceAccountToken: true + containers: + - image: {{ .Values.kubeState.image }} + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics diff --git a/templates/namespace.yaml b/templates/namespace.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d32523606f28187cc65fbb56387a78011a1e9425 --- /dev/null +++ b/templates/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring diff --git a/templates/nodeExporter.yaml b/templates/nodeExporter.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3860977237e74396f875a7ef4dc597e1eed54c38 --- /dev/null +++ b/templates/nodeExporter.yaml @@ -0,0 +1,75 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: allow-get-metrics + namespace: monitoring +rules: + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + spec: + tolerations: + - operator: Exists + containers: + - args: + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --web.disable-exporter-metrics + - --collector.disable-defaults + - --collector.os + - --collector.cpu + - --collector.meminfo + - --collector.diskstats + - --collector.filesystem + - --collector.netstat + - --collector.netclass + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^(veth.*)$ + name: node-exporter + image: prom/node-exporter + ports: + - containerPort: 9100 + protocol: TCP + resources: + requests: + cpu: {{ .Values.nodeExporter.resources.requests.cpu }} + memory: {{ .Values.nodeExporter.resources.requests.memory }} + limits: + cpu: {{ .Values.nodeExporter.resources.limits.cpu }} + memory: {{ .Values.nodeExporter.resources.limits.memory }} + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root diff --git a/templates/prometheus.yaml b/templates/prometheus.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5300a8406dc52c22d77ed1de9a2ec4f73fb376d8 --- /dev/null +++ b/templates/prometheus.yaml @@ -0,0 +1,289 @@ +apiVersion: v1 +kind: Secret +metadata: + name: monit-prom-mom-auth + namespace: monitoring +type: kubernetes.io/basic-auth +data: + username: {{ .Values.prometheus.remoteWrite.username }} + password: {{ .Values.prometheus.remoteWrite.password }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus + namespace: monitoring +rules: +- apiGroups: [""] + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: + - configmaps + verbs: ["get"] +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: prometheus + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: monit-prometheus-operator + app.kubernetes.io/version: 0.72.0 + name: monit-prometheus-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: monit-prometheus-operator +subjects: +- kind: ServiceAccount + name: monit-prometheus-operator + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: monit-prometheus-operator + name: monit-prometheus-operator +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - alertmanagers/finalizers + - alertmanagers/status + - alertmanagerconfigs + - prometheuses + - prometheuses/finalizers + - prometheuses/status + - prometheusagents + - prometheusagents/finalizers + - prometheusagents/status + - thanosrulers + - thanosrulers/finalizers + - thanosrulers/status + - scrapeconfigs + - servicemonitors + - podmonitors + - probes + - prometheusrules + verbs: + - '*' +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete +- apiGroups: + - "" + resources: + - services + - services/finalizers + - endpoints + verbs: + - get + - create + - update + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - patch + - create +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + verbs: + - get +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: monit-prometheus-operator + name: monit-prometheus-operator + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: monit-prometheus-operator + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: monit-prometheus-operator + labels: + app.kubernetes.io/name: monit-prometheus-operator + spec: + automountServiceAccountToken: true + containers: + - args: + - --kubelet-service=kube-system/kubelet + - --prometheus-config-reloader={{ .Values.prometheus.operator.configReloadImage }} + env: + - name: GOGC + value: "30" + image: {{ .Values.prometheus.operator.image }} + name: monit-prometheus-operator + ports: + - containerPort: 8080 + name: http + resources: + limits: + cpu: {{ .Values.prometheus.operator.resources.limits.cpu }} + memory: {{ .Values.prometheus.operator.resources.limits.memory }} + requests: + cpu: {{ .Values.prometheus.operator.resources.requests.cpu }} + memory: {{ .Values.prometheus.operator.resources.requests.memory }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + serviceAccountName: monit-prometheus-operator +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: monit-prometheus-operator + name: monit-prometheus-operator + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: monit-prometheus-operator + name: monit-prometheus-operator + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 8080 + targetPort: http + selector: + app.kubernetes.io/component: controller + app.kubernetes.io/name: monit-prometheus-operator +--- +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + name: monit-prometheus-server + namespace: monitoring +spec: + version: v2.50.0 + scrapeInterval: 10s + scrapeTimeout: 5s + retention: 24h + externalLabels: + k8s_cluster_name: {{ .Values.prometheus.externalLabels.k8sClusterName }} + serviceAccountName: prometheus + resources: + requests: + memory: {{ .Values.prometheus.resources.requests.memory }} + cpu: {{ .Values.prometheus.resources.requests.cpu }} + limits: + memory: {{ .Values.prometheus.resources.limits.memory }} + cpu: {{ .Values.prometheus.resources.limits.cpu }} + enableAdminAPI: false + serviceMonitorSelector: + matchLabels: + monit.cern.ch/flow: it-monitoring + podMonitorSelector: + matchLabels: + monit.cern.ch/flow: it-monitoring + remoteWrite: + - url: {{ .Values.prometheus.remoteWrite.url }} + tlsConfig: + insecureSkipVerify: true + basicAuth: + username: + name: monit-prom-mom-auth + key: username + password: + name: monit-prom-mom-auth + key: password diff --git a/templates/prometheusConfigs.yaml b/templates/prometheusConfigs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68431089709a7002e0ff27336de62664dc146907 --- /dev/null +++ b/templates/prometheusConfigs.yaml @@ -0,0 +1,75 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: podmonitor-node-exporter + namespace: monitoring + labels: + monit.cern.ch/flow: it-monitoring +spec: + namespaceSelector: + matchNames: + - monitoring + selector: + matchLabels: + app.kubernetes.io/name: node-exporter + podMetricsEndpoints: + - targetPort: 9100 + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: instance +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: servicemonitor-kube-state-metrics + namespace: monitoring + labels: + monit.cern.ch/flow: it-monitoring +spec: + jobLabel: k8s-app + endpoints: + - port: http-metrics + scheme: http + interval: 30s + - port: telemetry + scheme: http + interval: 30s + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + namespaceSelector: + matchNames: + - monitoring +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: servicemonitor-kubelet + namespace: monitoring + labels: + monit.cern.ch/flow: it-monitoring +spec: + jobLabel: k8s-app + endpoints: + - port: https-metrics + scheme: https + interval: 30s + tlsConfig: + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: https-metrics + scheme: https + path: /metrics/cadvisor + interval: 30s + honorLabels: true + tlsConfig: + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + selector: + matchLabels: + app.kubernetes.io/name: kubelet + namespaceSelector: + matchNames: + - kube-system diff --git a/values.yaml b/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60e48a152ac30e2fec7d8346be0f0c7b96a31a79 --- /dev/null +++ b/values.yaml @@ -0,0 +1,57 @@ +# Componentes +# - Node Exporter +# - Metrics Server +# - Kube State +# - Prometheus Operator + +nodeExporter: + image: prom/node-exporter:v1.7.0 + scrapeInterval: 15s + resources: + requests: + cpu: 5m + memory: 15Mi + limits: + cpu: 20m + memory: 25Mi + +kubeState: + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.11.0 + scrapeInterval: 15s + resources: + requests: + cpu: 0.1 + memory: 100Mi + limits: + cpu: 0.4 + memory: 400Mi + +prometheus: + version: v2.50.0 + scrapeInterval: 15s + scrapeTimeout: 5s + retention: 12h + externalLabels: + k8sClusterName: yourClusterName + remoteWrite: + url: "https://monit-prom-mom.cern.ch:9090/api/v1/write" + username: yourTeanantName + password: yourPasswordHereInBase64 + resources: + requests: + cpu: 100m + memory: 2Gi + limits: + cpu: 500m + memory: 5Gi + + operator: + image: quay.io/prometheus-operator/prometheus-operator:v0.72.0 + configReloadImage: quay.io/prometheus-operator/prometheus-config-reloader:v0.72.0 + resources: + requests: + cpu: 5m + memory: 25Mi + limits: + cpu: 100m + memory: 100Mi \ No newline at end of file