Skip to content
This repository has been archived by the owner on Mar 29, 2024. It is now read-only.

Commit

Permalink
Exposed zkclient metrics for prometheus (#184)
Browse files Browse the repository at this point in the history
  • Loading branch information
hiteshk25 authored Sep 2, 2022
1 parent 11c6392 commit a53936e
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ public final class PrometheusMetricsServlet extends BaseSolrServlet {
new OsMetricsApiCaller(),
new ThreadMetricsApiCaller(),
new StatusCodeMetricsApiCaller(),
new CoresMetricsApiCaller()
new CoresMetricsApiCaller(),
new NodeMetricsApiCaller()
));

private final Map<String, PrometheusMetricType> cacheMetricTypes = ImmutableMap.of(
Expand Down Expand Up @@ -320,6 +321,80 @@ protected void handle(List<PrometheusMetric> results, JsonNode metrics) throws I
}
}

// Reports zkClient metrics, which are at the solr node level
static class NodeMetricsApiCaller extends MetricsApiCaller {

NodeMetricsApiCaller() {
super("node", "CONTAINER.zkClient", "");
}

/*
"metrics": {
"solr.node": {
"CONTAINER.zkClient": {
"watchesFired": 93814,
"reads": 48722,
"writes": 9315,
"bytesRead": 2465678796,
"bytesWritten": 9331,
"multiOps": 8927,
"cumulativeMultiOps": 22392,
"childFetches": 109212,
"cumulativeChildrenFetched": 159345342,
"existsChecks": 162569,
"deletes": 0,
"dataWatches": 5560,
"childrenWatches": 196
}
}
}
*/
@Override
protected void handle(List<PrometheusMetric> results, JsonNode metrics) throws IOException {
long watchesFired = 0;
long reads = 0;
long writes = 0;
long bytesRead = 0;
long bytesWritten = 0;
long multiOps = 0;
long cumulativeMultiOps = 0;
long childFetches = 0;
long cumulativeChildrenFetched = 0;
long existsChecks = 0;
long deletes = 0;
long dataWatches = 0;
long childrenWatches = 0;
JsonNode zkClientNode = metrics.path("solr.node").path("CONTAINER.zkClient");
watchesFired += getNumber(zkClientNode, "watchesFired").longValue();
reads += getNumber(zkClientNode, "reads").longValue();
writes += getNumber(zkClientNode, "writes").longValue();
bytesRead += getNumber(zkClientNode, "bytesRead").longValue();
bytesWritten += getNumber(zkClientNode, "bytesWritten").longValue();
multiOps += getNumber(zkClientNode, "multiOps").longValue();
cumulativeMultiOps += getNumber(zkClientNode, "cumulativeMultiOps").longValue();
childFetches += getNumber(zkClientNode, "childFetches").longValue();
cumulativeChildrenFetched += getNumber(zkClientNode, "cumulativeChildrenFetched").longValue();
existsChecks += getNumber(zkClientNode, "existsChecks").longValue();
deletes += getNumber(zkClientNode, "deletes").longValue();
dataWatches += getNumber(zkClientNode, "dataWatches").longValue();
childrenWatches += getNumber(zkClientNode, "childrenWatches").longValue();

results.add(new PrometheusMetric("zkClient_watchesFired", PrometheusMetricType.COUNTER, "number of zk-watches fired on the solr node", watchesFired));
results.add(new PrometheusMetric("zkClient_reads", PrometheusMetricType.COUNTER, "number of docs zk-reads from the solr node", reads));
results.add(new PrometheusMetric("zkClient_writes", PrometheusMetricType.COUNTER, "number of zk-writes from the solr node", writes));
results.add(new PrometheusMetric("zkClient_bytesRead", PrometheusMetricType.COUNTER, "zk-client reads total bytes from the solr node", bytesRead));
results.add(new PrometheusMetric("zkClient_bytesWritten", PrometheusMetricType.COUNTER, "zk-client writes total bytes from the solr node", bytesWritten));
results.add(new PrometheusMetric("zkClient_multiOps", PrometheusMetricType.COUNTER, "zk-client multi ops from the solr node", multiOps));
results.add(new PrometheusMetric("zkClient_cumulativeMultiOps", PrometheusMetricType.COUNTER, "total multi zk-ops succeed from the solr node", cumulativeMultiOps));
results.add(new PrometheusMetric("zkClient_childFetches", PrometheusMetricType.COUNTER, "zk-clients fetches children from the solr node", childFetches));
results.add(new PrometheusMetric("zkClient_cumulativeChildrenFetched", PrometheusMetricType.COUNTER, "cumulative number of children fetches from the solr node", cumulativeChildrenFetched));
results.add(new PrometheusMetric("zkClient_existsChecks", PrometheusMetricType.COUNTER, "number of zk-exists checks from the solr node", existsChecks));
results.add(new PrometheusMetric("zkClient_deletes", PrometheusMetricType.COUNTER, "number of zk-deletes operations from the solr node", deletes));
results.add(new PrometheusMetric("zkClient_dataWatches", PrometheusMetricType.GAUGE, "number of data zk-watches from the solr node", dataWatches));
results.add(new PrometheusMetric("zkClient_childrenWatches", PrometheusMetricType.GAUGE, "number of child zk-watches from the solr node", childrenWatches));
}
}

// Aggregating across all the cores on the node.
// Report only local requests, excluding forwarded requests to other nodes.
static class CoresMetricsApiCaller extends MetricsApiCaller {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,4 +459,73 @@ public void testCoresMetricsApiCallerMissingIndex() throws Exception {
"deletes_by_query 66\n";
assertMetricsApiCaller(new PrometheusMetricsServlet.CoresMetricsApiCaller(), json, 25, output);
}

@Test
public void testNodeMetricsApiCaller() throws Exception {
String json = "{\n" +
" \"responseHeader\": {\n" +
" \"status\": 0,\n" +
" \"QTime\": 2\n" +
" },\n" +
" \"metrics\": {\n" +
" \"solr.node\": {\n" +
" \"CONTAINER.zkClient\": {\n" +
" \"watchesFired\": 93814,\n" +
" \"reads\": 48722,\n" +
" \"writes\": 9315,\n" +
" \"bytesRead\": 2465678796,\n" +
" \"bytesWritten\": 9331,\n" +
" \"multiOps\": 8927,\n" +
" \"cumulativeMultiOps\": 22392,\n" +
" \"childFetches\": 109212,\n" +
" \"cumulativeChildrenFetched\": 159345342,\n" +
" \"existsChecks\": 162575,\n" +
" \"deletes\": 0,\n" +
" \"dataWatches\": 5572,\n" +
" \"childrenWatches\": 196\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
String output = "# HELP zk_client_watches_fired number of zk-watches fired on the solr node\n" +
"# TYPE zk_client_watches_fired counter\n" +
"zk_client_watches_fired 93814\n" +
"# HELP zk_client_reads number of docs zk-reads from the solr node\n" +
"# TYPE zk_client_reads counter\n" +
"zk_client_reads 48722\n" +
"# HELP zk_client_writes number of zk-writes from the solr node\n" +
"# TYPE zk_client_writes counter\n" +
"zk_client_writes 9315\n" +
"# HELP zk_client_bytes_read zk-client reads total bytes from the solr node\n" +
"# TYPE zk_client_bytes_read counter\n" +
"zk_client_bytes_read 2465678796\n" +
"# HELP zk_client_bytes_written zk-client writes total bytes from the solr node\n" +
"# TYPE zk_client_bytes_written counter\n" +
"zk_client_bytes_written 9331\n" +
"# HELP zk_client_multi_ops zk-client multi ops from the solr node\n" +
"# TYPE zk_client_multi_ops counter\n" +
"zk_client_multi_ops 8927\n" +
"# HELP zk_client_cumulative_multi_ops total multi zk-ops succeed from the solr node\n" +
"# TYPE zk_client_cumulative_multi_ops counter\n" +
"zk_client_cumulative_multi_ops 22392\n" +
"# HELP zk_client_child_fetches zk-clients fetches children from the solr node\n" +
"# TYPE zk_client_child_fetches counter\n" +
"zk_client_child_fetches 109212\n" +
"# HELP zk_client_cumulative_children_fetched cumulative number of children fetches from the solr node\n" +
"# TYPE zk_client_cumulative_children_fetched counter\n" +
"zk_client_cumulative_children_fetched 159345342\n" +
"# HELP zk_client_exists_checks number of zk-exists checks from the solr node\n" +
"# TYPE zk_client_exists_checks counter\n" +
"zk_client_exists_checks 162575\n" +
"# HELP zk_client_deletes number of zk-deletes operations from the solr node\n" +
"# TYPE zk_client_deletes counter\n" +
"zk_client_deletes 0\n" +
"# HELP zk_client_data_watches number of data zk-watches from the solr node\n" +
"# TYPE zk_client_data_watches gauge\n" +
"zk_client_data_watches 5572\n" +
"# HELP zk_client_children_watches number of child zk-watches from the solr node\n" +
"# TYPE zk_client_children_watches gauge\n" +
"zk_client_children_watches 196\n";
assertMetricsApiCaller(new PrometheusMetricsServlet.NodeMetricsApiCaller(), json, 2, output);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ public String create(final String path, final byte[] data,
}
metrics.writes.increment();
if (data != null) {
metrics.bytesWritten.increment();
metrics.bytesWritten.add(data.length);
}
return result;

Expand Down

0 comments on commit a53936e

Please sign in to comment.