From 7ed4dc8d1c8b6a6681b487a8e2ea9a30284393b4 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 18 Dec 2024 16:45:23 +0100 Subject: [PATCH 1/2] fix(hadoop): Fix the JMX exporter configuration --- CHANGELOG.md | 6 ++++ hadoop/stackable/jmx/datanode.yaml | 51 +++++++++++++++++++++++++-- hadoop/stackable/jmx/journalnode.yaml | 15 ++++++-- hadoop/stackable/jmx/namenode.yaml | 46 ++++++++++++++++++++++-- 4 files changed, 111 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a60565d7..16e4abf7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,11 @@ All notable changes to this project will be documented in this file. - kafka: Remove `kubectl`, as we are now using listener-op ([#884]). +### Fixed + +- hadoop: Fix the JMX exporter configuration for metrics suffixed with + `_total`, `_info` and `_created` ([#962]). + [#884]: https://github.com/stackabletech/docker-images/pull/884 [#928]: https://github.com/stackabletech/docker-images/pull/928 [#943]: https://github.com/stackabletech/docker-images/pull/943 @@ -31,6 +36,7 @@ All notable changes to this project will be documented in this file. [#955]: https://github.com/stackabletech/docker-images/pull/955 [#958]: https://github.com/stackabletech/docker-images/pull/958 [#959]: https://github.com/stackabletech/docker-images/pull/959 +[#962]: https://github.com/stackabletech/docker-images/pull/962 ## [24.11.0] - 2024-11-18 diff --git a/hadoop/stackable/jmx/datanode.yaml b/hadoop/stackable/jmx/datanode.yaml index 8b375d655..690105997 100644 --- a/hadoop/stackable/jmx/datanode.yaml +++ b/hadoop/stackable/jmx/datanode.yaml @@ -22,6 +22,18 @@ rules: kind: 'MetricsSystem' sub: $2 type: GAUGE + # FSDatasetState with _total suffix (also extracts the FSDataset ID), + # e.g. Hadoop:name=FSDatasetState,attribute=EstimatedCapacityLostTotal + - pattern: 'Hadoop<>(.*_total): (\d+)' + attrNameSnakeCase: true + name: hadoop_$1_$3 + value: $4 + labels: + service: HDFS + role: $1 + fsdatasetid: $2 + kind: 'FSDatasetState' + type: COUNTER # FSDatasetState (also extracts the FSDataset ID) - pattern: 'Hadoop<>(.*): (\d+)' attrNameSnakeCase: true @@ -33,7 +45,19 @@ rules: fsdatasetid: $2 kind: 'FSDatasetState' type: GAUGE - # DataNodeActivity (also extracts hostname and port) + # DataNodeActivity with _info suffix (also extracts hostname and port), + # e.g. Hadoop:name=DataNodeActivity-hdfs-datanode-default-0-9866,attribute=BlocksGetLocalPathInfo + - pattern: 'Hadoop<>(.*_info): (\d+)' + attrNameSnakeCase: true + name: hadoop_$1_$4_ + value: $5 + labels: + service: HDFS + role: $1 + host: $2 + port: $3 + kind: 'DataNodeActivity' + type: GAUGE - pattern: 'Hadoop<>(.*): (\d+)' attrNameSnakeCase: true name: hadoop_$1_$4 @@ -45,8 +69,29 @@ rules: port: $3 kind: 'DataNodeActivity' type: GAUGE - # All other services - - pattern: 'Hadoop<>(.*): (\d+)' + # Generic counter, e.g. Hadoop:name=FSDatasetState,attribute=EstimatedCapacityLostTotal + - pattern: 'Hadoop<>(.*_total): (\d+)' + attrNameSnakeCase: true + name: hadoop_$1_$3 + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: COUNTER + # Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo + # The suffix _info is reserved for static information, therefore an underscore is appended. + - pattern: 'Hadoop<>(.*_info): (.*)' + attrNameSnakeCase: true + name: hadoop_$1_$3_ + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: GAUGE + # All other Hadoop metrics + - pattern: 'Hadoop<>(.*): (.*)' attrNameSnakeCase: true name: hadoop_$1_$3 value: $4 diff --git a/hadoop/stackable/jmx/journalnode.yaml b/hadoop/stackable/jmx/journalnode.yaml index 99bf911bd..8cfcada96 100644 --- a/hadoop/stackable/jmx/journalnode.yaml +++ b/hadoop/stackable/jmx/journalnode.yaml @@ -23,8 +23,19 @@ rules: kind: 'MetricsSystem' sub: $2 type: GAUGE - # All JournalNode infos - - pattern: 'Hadoop<>(.*): (\d+)' + # Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo + # The suffix _info is reserved for static information, therefore an underscore is appended. + - pattern: 'Hadoop<>(.*_info): (.*)' + attrNameSnakeCase: true + name: hadoop_$1_$3_ + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: GAUGE + # All other Hadoop metrics + - pattern: 'Hadoop<>(.*): (.*)' attrNameSnakeCase: true name: hadoop_$1_$3 value: $4 diff --git a/hadoop/stackable/jmx/namenode.yaml b/hadoop/stackable/jmx/namenode.yaml index a2f1ee7c9..b2ddbd6d5 100644 --- a/hadoop/stackable/jmx/namenode.yaml +++ b/hadoop/stackable/jmx/namenode.yaml @@ -23,8 +23,50 @@ rules: kind: 'MetricsSystem' sub: $2 type: GAUGE - # All NameNode infos - - pattern: 'Hadoop<>(.*): (\d+)' + # Total raw capacity in bytes, e.g. Hadoop:name=NameNodeInfo,attribute=Total + - pattern: 'Hadoop<>(total): (\d+)' + attrNameSnakeCase: true + name: hadoop_$1_$3 + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: COUNTER + # Generic counter, e.g. Hadoop:name=FSNamesystem,attribute=FilesTotal + - pattern: 'Hadoop<>(.*_total): (\d+)' + attrNameSnakeCase: true + name: hadoop_$1_$3 + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: COUNTER + # Metrics suffixed with _created, e.g. Hadoop:name=NameNodeActivity,attribute=FilesCreated + # The suffix _created is reserved for timestamps, therefore an underscore is appended. + - pattern: 'Hadoop<>(.*_created): (.*)' + attrNameSnakeCase: true + name: hadoop_$1_$3_ + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: GAUGE + # Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo + # The suffix _info is reserved for static information, therefore an underscore is appended. + - pattern: 'Hadoop<>(.*_info): (.*)' + attrNameSnakeCase: true + name: hadoop_$1_$3_ + value: $4 + labels: + service: HDFS + role: $1 + kind: $2 + type: GAUGE + # All other Hadoop metrics + - pattern: 'Hadoop<>(.*): (.*)' attrNameSnakeCase: true name: hadoop_$1_$3 value: $4 From d6ea25b66475861092d60f9c9855abbfb9cc1f4f Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Fri, 20 Dec 2024 10:49:17 +0100 Subject: [PATCH 2/2] chore(hadoop): Move copy step of the JMX configuration files from the builder image to the final image --- hadoop/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 0b681863f..acb4e0947 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -11,10 +11,7 @@ ARG TARGETARCH ARG TARGETOS ARG STACKABLE_USER_UID -WORKDIR /stackable - -COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx -COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /stackable/fuse_dfs_wrapper +WORKDIR /stackable/jmx # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -27,6 +24,8 @@ RUN curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prome ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \ ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar +WORKDIR /stackable + RUN ARCH="${TARGETARCH/amd64/x64}" && \ curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC . && \ ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler @@ -141,6 +140,7 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/jmx /stack COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/async-profiler /stackable/async-profiler/ COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /stackable/ +COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx # fuse is required for fusermount (called by fuse_dfs)