streaming/kafka-confluent/manifests/03-prometheus-metrics/my-cluster.yaml (232 lines of code) (raw):

# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- apiVersion: platform.confluent.io/v1beta1 kind: Kafka metadata: name: my-cluster spec: replicas: 3 tls: autoGeneratedCerts: true image: application: confluentinc/cp-server:7.4.0 init: confluentinc/confluent-init-container:2.6.0 dataVolumeCapacity: 100Gi storageClass: name: premium-rwo configOverrides: server: - offsets.topic.replication.factor=3 - transaction.state.log.replication.factor=3 - transaction.state.log.min.isr=2 - default.replication.factor=3 - min.insync.replicas=2 - auto.create.topics.enable=true listeners: custom: - name: tls port: 9093 tls: enabled: true podTemplate: tolerations: - key: "app.stateful/component" operator: "Equal" value: "kafka-broker" effect: NoSchedule affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: "app.stateful/component" operator: In values: - "kafka-broker" topologySpreadConstraints: - maxSkew: 1 topologyKey: "topology.kubernetes.io/zone" whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: app: my-cluster clusterId: kafka platform.confluent.io/type: kafka envVars: - name: KAFKA_HEAP_OPTS value: "-Xmx4G -Xms4G" resources: requests: memory: 5Gi cpu: "1" limits: memory: 5Gi cpu: "2" probe: readiness: failureThreshold: 15 metricReporter: enabled: true metrics: prometheus: rules: # Special cases and very specific rules - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value name: kafka_server_$1_$2 type: GAUGE labels: clientId: "$3" topic: "$4" partition: "$5" - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value name: kafka_server_$1_$2 type: GAUGE labels: clientId: "$3" broker: "$4:$5" - pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections name: kafka_server_$1_connections_tls_info type: GAUGE labels: cipher: "$2" protocol: "$3" listener: "$4" networkProcessor: "$5" - pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections name: kafka_server_$1_connections_software type: GAUGE labels: clientSoftwareName: "$2" clientSoftwareVersion: "$3" listener: "$4" networkProcessor: "$5" - pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):" name: kafka_server_$1_$4 type: GAUGE labels: listener: "$2" networkProcessor: "$3" - pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+) name: kafka_server_$1_$4 type: GAUGE labels: listener: "$2" networkProcessor: "$3" # Some percent metrics use MeanRate attribute # Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate name: kafka_$1_$2_$3_percent type: GAUGE # Generic gauges for percents - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value name: kafka_$1_$2_$3_percent type: GAUGE - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value name: kafka_$1_$2_$3_percent type: GAUGE labels: "$4": "$5" # Generic per-second counters with 0-2 key/value pairs - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count name: kafka_$1_$2_$3_total type: COUNTER labels: "$4": "$5" "$6": "$7" - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count name: kafka_$1_$2_$3_total type: COUNTER labels: "$4": "$5" - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count name: kafka_$1_$2_$3_total type: COUNTER # Generic gauges with 0-2 key/value pairs - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value name: kafka_$1_$2_$3 type: GAUGE labels: "$4": "$5" "$6": "$7" - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value name: kafka_$1_$2_$3 type: GAUGE labels: "$4": "$5" - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value name: kafka_$1_$2_$3 type: GAUGE # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. # Note that these are missing the '_sum' metric! - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count name: kafka_$1_$2_$3_count type: COUNTER labels: "$4": "$5" "$6": "$7" - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile name: kafka_$1_$2_$3 type: GAUGE labels: "$4": "$5" "$6": "$7" quantile: "0.$8" - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count name: kafka_$1_$2_$3_count type: COUNTER labels: "$4": "$5" - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile name: kafka_$1_$2_$3 type: GAUGE labels: "$4": "$5" quantile: "0.$6" - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count name: kafka_$1_$2_$3_count type: COUNTER - pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile name: kafka_$1_$2_$3 type: GAUGE labels: quantile: "0.$4" dependencies: zookeeper: endpoint: zookeeper.kafka.svc.cluster.local:2182 tls: enabled: true --- apiVersion: platform.confluent.io/v1beta1 kind: Zookeeper metadata: name: zookeeper spec: replicas: 3 tls: autoGeneratedCerts: true image: application: confluentinc/cp-zookeeper:7.4.0 init: confluentinc/confluent-init-container:2.6.0 dataVolumeCapacity: 90Gi logVolumeCapacity: 10Gi storageClass: name: premium-rwo podTemplate: affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: "app.stateful/component" operator: In values: - "zookeeper" topologySpreadConstraints: - maxSkew: 1 topologyKey: "topology.kubernetes.io/zone" whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: app: zookeeper clusterId: kafka platform.confluent.io/type: zookeeper resources: requests: memory: 3Gi cpu: "1" limits: memory: 3Gi cpu: "2"