elastic · gpop63 · Feb 10, 2022 · Feb 3, 2022 · Feb 3, 2022 · endorama
diff --git a/CHANGELOG-developer.next.asciidoc b/CHANGELOG-developer.next.asciidoc
@@ -132,6 +132,7 @@ The list below covers the major changes between 7.0.0-rc2 and main only.
 - Add support for `credentials_json` in `gcp` module, all metricsets {pull}29584[29584]
 - Add gcp firestore metricset. {pull}29918[29918]
 - Added TESTING_FILEBEAT_FILEPATTERN option for filebeat module pytests {pull}30103[30103]
+- Add gcp dataproc metricset. {pull}30008[30008]
 
 ==== Deprecated
 

diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc
@@ -34857,6 +34857,210 @@ type: long
 
 --
 
+[float]
+=== dataproc
+
+Google Cloud Dataproc metrics
+
+
+*`gcp.dataproc.batch.spark.executors.count`*::
++
+--
+Indicates the number of Batch Spark executors.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.hdfs.datanodes.count`*::
++
+--
+Indicates the number of HDFS DataNodes that are running inside a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.hdfs.storage_capacity.value`*::
++
+--
+Indicates capacity of HDFS system running on cluster in GB.
+
+type: double
+
+--
+
+*`gcp.dataproc.cluster.hdfs.storage_utilization.value`*::
++
+--
+The percentage of HDFS storage currently used.
+
+type: double
+
+--
+
+*`gcp.dataproc.cluster.hdfs.unhealthy_blocks.count`*::
++
+--
+Indicates the number of unhealthy blocks inside the cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.job.completion_time.value`*::
++
+--
+The time jobs took to complete from the time the user submits a job to the time Dataproc reports it is completed.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.job.duration.value`*::
++
+--
+The time jobs have spent in a given state.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.job.failed.count`*::
++
+--
+Indicates the number of jobs that have failed on a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.job.running.count`*::
++
+--
+Indicates the number of jobs that are running on a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.job.submitted.count`*::
++
+--
+Indicates the number of jobs that have been submitted to a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.operation.completion_time.value`*::
++
+--
+The time operations took to complete from the time the user submits a operation to the time Dataproc reports it is completed.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.operation.duration.value`*::
++
+--
+The time operations have spent in a given state.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.operation.failed.count`*::
++
+--
+Indicates the number of operations that have failed on a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.operation.running.count`*::
++
+--
+Indicates the number of operations that are running on a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.operation.submitted.count`*::
++
+--
+Indicates the number of operations that have been submitted to a cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.yarn.allocated_memory_percentage.value`*::
++
+--
+The percentage of YARN memory is allocated.
+
+type: double
+
+--
+
+*`gcp.dataproc.cluster.yarn.apps.count`*::
++
+--
+Indicates the number of active YARN applications.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.yarn.containers.count`*::
++
+--
+Indicates the number of YARN containers.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.yarn.memory_size.value`*::
++
+--
+Indicates the YARN memory size in GB.
+
+type: double
+
+--
+
+*`gcp.dataproc.cluster.yarn.nodemanagers.count`*::
++
+--
+Indicates the number of YARN NodeManagers running inside cluster.
+
+type: long
+
+--
+
+*`gcp.dataproc.cluster.yarn.pending_memory_size.value`*::
++
+--
+The current memory request, in GB, that is pending to be fulfilled by the scheduler.
+
+type: double
+
+--
+
+*`gcp.dataproc.cluster.yarn.virtual_cores.count`*::
++
+--
+Indicates the number of virtual cores in YARN.
+
+type: long
+
+--
+
 [float]
 === firestore
 

diff --git a/metricbeat/docs/modules/gcp.asciidoc b/metricbeat/docs/modules/gcp.asciidoc
@@ -276,6 +276,7 @@ metricbeat.modules:
     - pubsub
     - loadbalancing
     - firestore
+    - dataproc
   zone: "us-central1-a"
   project_id: "your project id"
   credentials_file_path: "your JSON credentials file path"
@@ -340,6 +341,8 @@ The following metricsets are available:
 
 * <<metricbeat-metricset-gcp-compute,compute>>
 
+* <<metricbeat-metricset-gcp-dataproc,dataproc>>
+
 * <<metricbeat-metricset-gcp-firestore,firestore>>
 
 * <<metricbeat-metricset-gcp-gke,gke>>
@@ -356,6 +359,8 @@ include::gcp/billing.asciidoc[]
 
 include::gcp/compute.asciidoc[]
 
+include::gcp/dataproc.asciidoc[]
+
 include::gcp/firestore.asciidoc[]
 
 include::gcp/gke.asciidoc[]

diff --git a/metricbeat/docs/modules/gcp/dataproc.asciidoc b/metricbeat/docs/modules/gcp/dataproc.asciidoc
@@ -0,0 +1,24 @@
+////
+This file is generated! See scripts/mage/docs_collector.go
+////
+
+[[metricbeat-metricset-gcp-dataproc]]
+[role="xpack"]
+=== Google Cloud Platform dataproc metricset
+
+beta[]
+
+include::../../../../x-pack/metricbeat/module/gcp/dataproc/_meta/docs.asciidoc[]
+
+
+==== Fields
+
+For a description of each field in the metricset, see the
+<<exported-fields-gcp,exported fields>> section.
+
+Here is an example document generated by this metricset:
+
+[source,json]
+----
+include::../../../../x-pack/metricbeat/module/gcp/dataproc/_meta/data.json[]
+----
diff --git a/metricbeat/docs/modules_list.asciidoc b/metricbeat/docs/modules_list.asciidoc
@@ -120,8 +120,9 @@ This file is generated! See scripts/mage/docs_collector.go
 |<<metricbeat-metricset-etcd-self,self>>   
 |<<metricbeat-metricset-etcd-store,store>>   
 |<<metricbeat-module-gcp,Google Cloud Platform>>  beta[]   |image:./images/icon-yes.png[Prebuilt dashboards are available]    |  
-.8+| .8+|  |<<metricbeat-metricset-gcp-billing,billing>> beta[]  
+.9+| .9+|  |<<metricbeat-metricset-gcp-billing,billing>> beta[]  
 |<<metricbeat-metricset-gcp-compute,compute>> beta[]  
+|<<metricbeat-metricset-gcp-dataproc,dataproc>> beta[]  
 |<<metricbeat-metricset-gcp-firestore,firestore>> beta[]  
 |<<metricbeat-metricset-gcp-gke,gke>> beta[]  
 |<<metricbeat-metricset-gcp-loadbalancing,loadbalancing>> beta[]  

@@ -554,6 +554,7 @@ metricbeat.modules:
     - pubsub
     - loadbalancing
     - firestore
+    - dataproc
   zone: "us-central1-a"
   project_id: "your project id"
   credentials_file_path: "your JSON credentials file path"

@@ -12,6 +12,7 @@
     - pubsub
     - loadbalancing
     - firestore
+    - dataproc
   zone: "us-central1-a"
   project_id: "your project id"
   credentials_file_path: "your JSON credentials file path"

@@ -23,6 +23,7 @@ const (
 	ServicePubsub         = "pubsub"
 	ServiceStorage        = "storage"
 	ServiceFirestore      = "firestore"
+	ServiceDataproc       = "dataproc"
 )
 
 //Paths within the GCP monitoring.TimeSeries response, if converted to JSON, where you can find each ECS field required for the output event

@@ -0,0 +1,41 @@
+{
+    "@timestamp": "2016-05-23T08:05:34.853Z",
+    "cloud": {
+        "account": {
+            "id": "elastic-apm"
+        },
+        "provider": "gcp"
+    },
+    "event": {
+        "dataset": "gcp.dataproc",
+        "duration": 115000,
+        "module": "gcp"
+    },
+    "gcp": {
+        "labels": {
+            "metrics": {
+                "storage_class": "MULTI_REGIONAL"
+            },
+            "resource": {
+                "bucket_name": "artifacts.elastic-apm.appspot.com",
+                "location": "us"
+            }
+        },
+        "dataproc": {
+            "cluster": {
+                "hdfs": {
+                    "datanodes": {
+                        "value": 15
+                    }
+                }
+            }
+        }
+    },
+    "metricset": {
+        "name": "dataproc",
+        "period": 10000
+    },
+    "service": {
+        "type": "gcp"
+    }
+}
@@ -0,0 +1,41 @@
+{
+    "@timestamp": "2016-05-23T08:05:34.853Z",
+    "cloud": {
+        "account": {
+            "id": "elastic-apm"
+        },
+        "provider": "gcp"
+    },
+    "event": {
+        "dataset": "gcp.dataproc",
+        "duration": 115000,
+        "module": "gcp"
+    },
+    "gcp": {
+        "labels": {
+            "metrics": {
+                "storage_class": "MULTI_REGIONAL"
+            },
+            "resource": {
+                "bucket_name": "artifacts.elastic-apm.appspot.com",
+                "location": "us"
+            }
+        },
+        "dataproc": {
+            "cluster": {
+                "hdfs": {
+                    "datanodes": {
+                        "value": 15
+                    }
+                }
+            }
+        }
+    },
+    "metricset": {
+        "name": "dataproc",
+        "period": 10000
+    },
+    "service": {
+        "type": "gcp"
+    }
+}