From 006eab239470ba85f2ebb06173d2682519afb95d Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 21 Dec 2017 13:22:48 -0800 Subject: [PATCH 1/4] Added telemetry around Catalog APIs --- agent/catalog_endpoint.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index 1eb30fc3a66b..7fec1715133c 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -5,12 +5,14 @@ import ( "net/http" "strings" + metrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/structs" ) var durations = NewDurationFixer("interval", "timeout", "deregistercriticalserviceafter") func (s *HTTPServer) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_register"}, 1) if req.Method != "PUT" { return nil, MethodNotAllowedError{req.Method, []string{"PUT"}} } @@ -31,12 +33,15 @@ func (s *HTTPServer) CatalogRegister(resp http.ResponseWriter, req *http.Request // Forward to the servers var out struct{} if err := s.agent.RPC("Catalog.Register", &args, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_register"}, 1) return nil, err } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_register"}, 1) return true, nil } func (s *HTTPServer) CatalogDeregister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_deregister"}, 1) if req.Method != "PUT" { return nil, MethodNotAllowedError{req.Method, []string{"PUT"}} } @@ -57,24 +62,30 @@ func (s *HTTPServer) CatalogDeregister(resp http.ResponseWriter, req *http.Reque // Forward to the servers var out struct{} if err := s.agent.RPC("Catalog.Deregister", &args, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_deregister"}, 1) return nil, err } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_deregister"}, 1) return true, nil } func (s *HTTPServer) CatalogDatacenters(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_datacenters"}, 1) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } var out []string if err := s.agent.RPC("Catalog.ListDatacenters", struct{}{}, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_datacenters"}, 1) return nil, err } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_datacenters"}, 1) return out, nil } func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_nodes"}, 1) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -84,6 +95,7 @@ func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) ( s.parseSource(req, &args.Source) args.NodeMetaFilters = s.parseMetaFilter(req) if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_nodes"}, 1) return nil, nil } @@ -98,10 +110,12 @@ func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) ( if out.Nodes == nil { out.Nodes = make(structs.Nodes, 0) } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_nodes"}, 1) return out.Nodes, nil } func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_services"}, 1) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -116,6 +130,7 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request var out structs.IndexedServices defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.ListServices", &args, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_services"}, 1) return nil, err } @@ -123,10 +138,12 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request if out.Services == nil { out.Services = make(structs.Services, 0) } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_services"}, 1) return out.Services, nil } func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_service_nodes"}, 1) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -158,6 +175,7 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req var out structs.IndexedServiceNodes defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.ServiceNodes", &args, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_service_nodes"}, 1) return nil, err } s.agent.TranslateAddresses(args.Datacenter, out.ServiceNodes) @@ -171,10 +189,12 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req s.ServiceTags = make([]string, 0) } } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_service_nodes"}, 1) return out.ServiceNodes, nil } func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + metrics.IncrCounter([]string{"client", "api", "catalog_node_services"}, 1) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -197,6 +217,7 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req var out structs.IndexedNodeServices defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.NodeServices", &args, &out); err != nil { + metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_node_services"}, 1) return nil, err } if out.NodeServices != nil && out.NodeServices.Node != nil { @@ -211,5 +232,6 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req } } } + metrics.IncrCounter([]string{"client", "api", "success", "catalog_node_services"}, 1) return out.NodeServices, nil } From 7f28391cbf26772f9cd78fcd757f45104e7188d3 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 21 Dec 2017 16:58:39 -0800 Subject: [PATCH 2/4] Added docs --- website/source/docs/agent/telemetry.html.md | 84 +++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/website/source/docs/agent/telemetry.html.md b/website/source/docs/agent/telemetry.html.md index 6f414634860a..b8209e3e56a7 100644 --- a/website/source/docs/agent/telemetry.html.md +++ b/website/source/docs/agent/telemetry.html.md @@ -74,6 +74,90 @@ These metrics are used to monitor the health of specific Consul agents. rejected requests counter + + `consul.client.api.catalog_register` + This increments whenever a Consul agent receives a catalog register request. + requests + counter + + + `consul.client.api.success.catalog_register` + This increments whenever a Consul agent successfully responds to a catalog register request. + requests + counter + + + `consul.client.api.catalog_deregister` + This increments whenever a Consul agent receives a catalog de-register request. + requests + counter + + + `consul.client.api.success.catalog_deregister` + This increments whenever a Consul agent successfully responds to a catalog de-register request. + requests + counter + + + `consul.client.api.catalog_datacenters` + This increments whenever a Consul agent receives a request to list datacenters in the catalog. + requests + counter + + + `consul.client.api.success.catalog_datacenters` + This increments whenever a Consul agent successfully responds to a request to list datacenters. + requests + counter + + + `consul.client.api.catalog_nodes` + This increments whenever a Consul agent receives a request to list nodes from the catalog. + requests + counter + + + `consul.client.api.success.catalog_nodes` + This increments whenever a Consul agent successfully responds to a request to list nodes. + requests + counter + + + `consul.client.api.catalog_services` + This increments whenever a Consul agent receives a request to list services from the catalog. + requests + counter + + + `consul.client.api.success.catalog_services` + This increments whenever a Consul agent successfully responds to a request to list services. + requests + counter + + + `consul.client.api.catalog_service_nodes` + This increments whenever a Consul agent receives a request to list nodes offering a service. + requests + counter + + + `consul.client.api.success.catalog_service_nodes` + This increments whenever a Consul agent successfully responds to a request to list nodes offering a service. + requests + counter + + + `consul.client.api.catalog_node_services` + This increments whenever a Consul agent receives a request to list services registered in a node. + requests + counter + + + `consul.client.api.success.catalog_node_services` + This increments whenever a Consul agent successfully responds to a request to list services in a service. + requests + counter + `consul.runtime.num_goroutines` This tracks the number of running goroutines and is a general load pressure indicator. This may burst from time to time but should return to a steady state value. From 294151c1ada8baaf43953367682032e2324dc058 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 21 Dec 2017 20:30:29 -0800 Subject: [PATCH 3/4] Using labels --- agent/catalog_endpoint.go | 63 ++++++++++++++++++++++++++------------- agent/http.go | 5 ++++ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index 7fec1715133c..d1e6bfdd4dbd 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -12,7 +12,8 @@ import ( var durations = NewDurationFixer("interval", "timeout", "deregistercriticalserviceafter") func (s *HTTPServer) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_register"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "PUT" { return nil, MethodNotAllowedError{req.Method, []string{"PUT"}} } @@ -33,15 +34,18 @@ func (s *HTTPServer) CatalogRegister(resp http.ResponseWriter, req *http.Request // Forward to the servers var out struct{} if err := s.agent.RPC("Catalog.Register", &args, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_register"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_register"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_register"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_register"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return true, nil } func (s *HTTPServer) CatalogDeregister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_deregister"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_deregister"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "PUT" { return nil, MethodNotAllowedError{req.Method, []string{"PUT"}} } @@ -62,30 +66,36 @@ func (s *HTTPServer) CatalogDeregister(resp http.ResponseWriter, req *http.Reque // Forward to the servers var out struct{} if err := s.agent.RPC("Catalog.Deregister", &args, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_deregister"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_deregister"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_deregister"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_deregister"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return true, nil } func (s *HTTPServer) CatalogDatacenters(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_datacenters"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_datacenters"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } var out []string if err := s.agent.RPC("Catalog.ListDatacenters", struct{}{}, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_datacenters"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_datacenters"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_datacenters"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_datacenters"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return out, nil } func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -95,7 +105,8 @@ func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) ( s.parseSource(req, &args.Source) args.NodeMetaFilters = s.parseMetaFilter(req) if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, nil } @@ -110,12 +121,14 @@ func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) ( if out.Nodes == nil { out.Nodes = make(structs.Nodes, 0) } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return out.Nodes, nil } func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -130,7 +143,8 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request var out structs.IndexedServices defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.ListServices", &args, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } @@ -138,12 +152,14 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request if out.Services == nil { out.Services = make(structs.Services, 0) } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return out.Services, nil } func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_service_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_service_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -175,7 +191,8 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req var out structs.IndexedServiceNodes defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.ServiceNodes", &args, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_service_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_service_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } s.agent.TranslateAddresses(args.Datacenter, out.ServiceNodes) @@ -189,12 +206,14 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req s.ServiceTags = make([]string, 0) } } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_service_nodes"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_service_nodes"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return out.ServiceNodes, nil } func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - metrics.IncrCounter([]string{"client", "api", "catalog_node_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_node_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) if req.Method != "GET" { return nil, MethodNotAllowedError{req.Method, []string{"GET"}} } @@ -217,7 +236,8 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req var out structs.IndexedNodeServices defer setMeta(resp, &out.QueryMeta) if err := s.agent.RPC("Catalog.NodeServices", &args, &out); err != nil { - metrics.IncrCounter([]string{"client", "rpc", "error", "catalog_node_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "catalog_node_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return nil, err } if out.NodeServices != nil && out.NodeServices.Node != nil { @@ -232,6 +252,7 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req } } } - metrics.IncrCounter([]string{"client", "api", "success", "catalog_node_services"}, 1) + metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_node_services"}, 1, + []metrics.Label{{Name: "node", Value: s.nodeName()}}) return out.NodeServices, nil } diff --git a/agent/http.go b/agent/http.go index 1b869102e9b6..51bee5766b5c 100644 --- a/agent/http.go +++ b/agent/http.go @@ -139,6 +139,11 @@ func (s *HTTPServer) handler(enableDebug bool) http.Handler { } } +// nodeName returns the node name of the agent +func (s *HTTPServer) nodeName() string { + return s.agent.config.NodeName +} + // aclEndpointRE is used to find old ACL endpoints that take tokens in the URL // so that we can redact them. The ACL endpoints that take the token in the URL // are all of the form /v1/acl//, and can optionally include query From 3858317ac983829bbd62795d15899936e4ae1f62 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 21 Dec 2017 20:37:13 -0800 Subject: [PATCH 4/4] Updated docs --- website/source/docs/agent/telemetry.html.md | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/website/source/docs/agent/telemetry.html.md b/website/source/docs/agent/telemetry.html.md index b8209e3e56a7..c0de0644c4b0 100644 --- a/website/source/docs/agent/telemetry.html.md +++ b/website/source/docs/agent/telemetry.html.md @@ -75,85 +75,85 @@ These metrics are used to monitor the health of specific Consul agents. counter - `consul.client.api.catalog_register` + `consul.client.api.catalog_register.` This increments whenever a Consul agent receives a catalog register request. requests counter - `consul.client.api.success.catalog_register` + `consul.client.api.success.catalog_register.` This increments whenever a Consul agent successfully responds to a catalog register request. requests counter - `consul.client.api.catalog_deregister` + `consul.client.api.catalog_deregister.` This increments whenever a Consul agent receives a catalog de-register request. requests counter - `consul.client.api.success.catalog_deregister` + `consul.client.api.success.catalog_deregister.` This increments whenever a Consul agent successfully responds to a catalog de-register request. requests counter - `consul.client.api.catalog_datacenters` + `consul.client.api.catalog_datacenters.` This increments whenever a Consul agent receives a request to list datacenters in the catalog. requests counter - `consul.client.api.success.catalog_datacenters` + `consul.client.api.success.catalog_datacenters.` This increments whenever a Consul agent successfully responds to a request to list datacenters. requests counter - `consul.client.api.catalog_nodes` + `consul.client.api.catalog_nodes.` This increments whenever a Consul agent receives a request to list nodes from the catalog. requests counter - `consul.client.api.success.catalog_nodes` + `consul.client.api.success.catalog_nodes.` This increments whenever a Consul agent successfully responds to a request to list nodes. requests counter - `consul.client.api.catalog_services` + `consul.client.api.catalog_services.` This increments whenever a Consul agent receives a request to list services from the catalog. requests counter - `consul.client.api.success.catalog_services` + `consul.client.api.success.catalog_services.` This increments whenever a Consul agent successfully responds to a request to list services. requests counter - `consul.client.api.catalog_service_nodes` + `consul.client.api.catalog_service_nodes.` This increments whenever a Consul agent receives a request to list nodes offering a service. requests counter - `consul.client.api.success.catalog_service_nodes` + `consul.client.api.success.catalog_service_nodes.` This increments whenever a Consul agent successfully responds to a request to list nodes offering a service. requests counter - `consul.client.api.catalog_node_services` + `consul.client.api.catalog_node_services.` This increments whenever a Consul agent receives a request to list services registered in a node. requests counter - `consul.client.api.success.catalog_node_services` + `consul.client.api.success.catalog_node_services.` This increments whenever a Consul agent successfully responds to a request to list services in a service. requests counter