Skip to content

Commit

Permalink
recommender: add metrics for recommender http actions (#227)
Browse files Browse the repository at this point in the history
* recommender: add metrics for recommender http actions

```
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="0.1"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="0.3"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="0.6"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="1"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="3"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="6"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="9"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="20"} 1
http_client_request_duration_seconds_bucket{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="+Inf"} 1
http_client_request_duration_seconds_sum{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 0.000287125
http_client_request_duration_seconds_count{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 1
http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="0.1"} 1
http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="0.3"} 1
100 32288    0 32288    0ect",le="0.6"} 1et{code="308",method="post",recommender="http://localhost:8089/autoscaling/redir
     0  14.5M   http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="1"} 1
  http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="3"} 1
 0http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="6"} 1
 http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="9"} 1
http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="20"} 1
http_client_request_duration_seconds_bucket{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect",le="+Inf"} 1
http_client_request_duration_seconds_sum{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 0.003237708
--:--:-- --:--:-- --:--:-- 15.3M
http_client_request_duration_seconds_count{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 1
http_client_requests_inflight 0
http_client_requests_total{code="200",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 1
http_client_requests_total{code="308",method="post",recommender="http://localhost:8089/autoscaling/redirect"} 1
```

* Update licenses

* Move metrics, restore main

* Cleanup a bit the recommender code

* Remove recoverPAnic

* Update the doc
  • Loading branch information
iksaif authored Nov 6, 2024
1 parent fe977a9 commit cf7a732
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 14 deletions.
4 changes: 4 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ core,github.com/google/uuid,BSD-3-Clause
core,github.com/imdario/mergo,BSD-3-Clause
core,github.com/josharian/intern,MIT
core,github.com/json-iterator/go,MIT
core,github.com/klauspost/compress,Apache-2.0
core,github.com/klauspost/compress/internal/snapref,BSD-3-Clause
core,github.com/klauspost/compress/zstd/internal/xxhash,MIT
core,github.com/mailru/easyjson,MIT
core,github.com/modern-go/concurrent,Apache-2.0
core,github.com/modern-go/reflect2,Apache-2.0
core,github.com/munnerz/goautoneg,BSD-3-Clause
core,github.com/pkg/errors,BSD-2-Clause
core,github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil,BSD-3-Clause
core,github.com/prometheus/client_golang/prometheus,Apache-2.0
core,github.com/prometheus/client_model/go,Apache-2.0
core,github.com/prometheus/common,Apache-2.0
Expand Down
12 changes: 11 additions & 1 deletion cmd/skeleton-recommender/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ func int32Ptr(i int32) *int32 {
// Static autoscaler that always recommends to scale up by 1 replica, can be tried with
// curl -X POST -d '{"state": {"currentReplicas": 1}, "targets": [{"type": "cpu", "targetValue": 0.5}]}' http://localhost:8089/autoscaling
func autoscaling(w http.ResponseWriter, r *http.Request) {
log.Printf("Handling %s %s %s bytes\n", r.Method, r.URL.Path, r.Header.Get("Content-Length"))

body, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusInternalServerError)
Expand All @@ -38,6 +40,8 @@ func autoscaling(w http.ResponseWriter, r *http.Request) {
return
}

log.Printf("< %+v\n", request)

var currentReplicas = int32(0)

if request.GetState() != nil {
Expand Down Expand Up @@ -70,17 +74,23 @@ func autoscaling(w http.ResponseWriter, r *http.Request) {
return
}

log.Printf("< %+v\n", request)
log.Printf("> %+v\n\n", response)
}

// Redirects /autoscaling/redirect to /autoscaling
func autoscalingRedirect(w http.ResponseWriter, r *http.Request) {
log.Printf("Redirecting %s to /autoscaling", r.URL.Path)
http.Redirect(w, r, "/autoscaling", http.StatusPermanentRedirect)
}

func main() {
flags := pflag.NewFlagSet("skeleton-recommender", pflag.ExitOnError)
flags.String("addr", ":8089", "Address to listen on")

pflag.CommandLine = flags

http.HandleFunc("/autoscaling", autoscaling)
http.HandleFunc("/autoscaling/redirect", autoscalingRedirect)

err := http.ListenAndServe(flags.Lookup("addr").Value.String(), nil)
if err != nil {
Expand Down
23 changes: 23 additions & 0 deletions controllers/datadoghq/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,26 @@ var (
},
append(extraPromLabels, wpaNamePromLabel, wpaNamespacePromLabel, resourceNamespacePromLabel),
)
requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_client_request_duration_seconds",
Help: "Tracks the latencies for HTTP requests.",
Buckets: []float64{0.1, 0.3, 0.6, 1, 3, 6, 9, 20},
},
[]string{"client", "method", "code"},
)
requestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_client_requests_total",
Help: "Tracks the number of HTTP requests.",
}, []string{"client", "method", "code"},
)
responseInflight = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "http_client_requests_inflight",
Help: "Tracks the number of client requests currently in progress.",
}, []string{"client"},
)
)

func init() {
Expand All @@ -276,6 +296,9 @@ func init() {
sigmetrics.Registry.MustRegister(replicaMax)
sigmetrics.Registry.MustRegister(dryRun)
sigmetrics.Registry.MustRegister(labelsInfo)
sigmetrics.Registry.MustRegister(requestDuration)
sigmetrics.Registry.MustRegister(requestsTotal)
sigmetrics.Registry.MustRegister(responseInflight)
}

func cleanupAssociatedMetrics(wpa *datadoghqv1alpha1.WatermarkPodAutoscaler, onlyMetricsSpecific bool) {
Expand Down
52 changes: 43 additions & 9 deletions controllers/datadoghq/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"net/url"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/types/known/structpb"

Expand All @@ -37,6 +39,7 @@ type RecommenderClient interface {
}

type RecommenderClientImpl struct {
client *http.Client
}

type ReplicaRecommendationRequest struct {
Expand All @@ -58,13 +61,45 @@ type ReplicaRecommendationResponse struct {
Details string
}

func NewRecommenderClient() RecommenderClient {
return &RecommenderClientImpl{}
// NewRecommenderClient returns a new RecommenderClient with the given http.Client.
func NewRecommenderClient(client *http.Client) RecommenderClient {
if client.Transport == nil {
client.Transport = http.DefaultTransport
}
return &RecommenderClientImpl{
client: client,
}
}

// instrumentedClient returns a copy of the client with an instrumented Transport for this recommender.
//
// The returned client is a shallow copy of the original client, with the Transport field replaced
// with an instrumented RoundTripper (which just wraps the original Transport).
func (r *RecommenderClientImpl) instrumentedClient(recommender string) *http.Client {
client := *r.client
client.Transport = instrumentRoundTripper(recommender, client.Transport)
return &client
}

func instrumentRoundTripper(recommender string, transport http.RoundTripper) http.RoundTripper {
labels := prometheus.Labels{"recommender": recommender}

return promhttp.InstrumentRoundTripperCounter(
requestsTotal.MustCurryWith(labels),
promhttp.InstrumentRoundTripperInFlight(
responseInflight.With(labels),
promhttp.InstrumentRoundTripperDuration(
requestDuration.MustCurryWith(labels),
transport,
),
),
)
}

// GetReplicaRecommendation returns a recommendation for the number of replicas to scale to
// based on the given ReplicaRecommendationRequest.
// Current it supports http based recommendation service, but we need to implement grpc services too.
//
// Currently, it supports http based recommendation service, but we need to implement grpc services too.
func (r *RecommenderClientImpl) GetReplicaRecommendation(request *ReplicaRecommendationRequest) (*ReplicaRecommendationResponse, error) {
reco := request.Recommender
if reco == nil {
Expand Down Expand Up @@ -93,13 +128,17 @@ func (r *RecommenderClientImpl) GetReplicaRecommendation(request *ReplicaRecomme
// TODO: We might want to make the timeout configurable later.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

client := r.instrumentedClient(request.Recommender.URL)

httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), bytes.NewReader(payload))
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("User-Agent", "wpa-controller")

if err != nil {
return &ReplicaRecommendationResponse{}, fmt.Errorf("error creating request: %w", err)
}
resp, err := http.DefaultClient.Do(httpReq)
resp, err := client.Do(httpReq)

defer func() {
if resp != nil && resp.Body != nil {
Expand Down Expand Up @@ -194,8 +233,3 @@ func buildReplicaRecommendationResponse(reply *autoscaling.WorkloadRecommendatio
}

var _ RecommenderClient = &RecommenderClientImpl{}

type RecommenderClientMock struct {
ReturnedResponse ReplicaRecommendationResponse
Error error
}
5 changes: 5 additions & 0 deletions controllers/datadoghq/recommender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ func (m *RecommenderClientMock) GetReplicaRecommendation(request *ReplicaRecomme
}

var _ RecommenderClient = &RecommenderClientMock{}

type RecommenderClientMock struct {
ReturnedResponse ReplicaRecommendationResponse
Error error
}
3 changes: 2 additions & 1 deletion controllers/datadoghq/watermarkpodautoscaler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"math"
"net/http"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -956,7 +957,7 @@ func (r *WatermarkPodAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager, wo
nil,
external_metrics.NewForConfigOrDie(podConfig),
)
rc := NewRecommenderClient()
rc := NewRecommenderClient(http.DefaultClient)
var stop chan struct{}
pl := initializePodInformer(podConfig, stop)

Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/go-logr/logr v1.4.2
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.34.1
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/client_golang v1.20.5
github.com/prometheus/client_model v0.6.1
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5
Expand Down Expand Up @@ -70,6 +70,7 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/moby/term v0.5.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
Expand All @@ -165,6 +167,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
github.com/lyft/protoc-gen-star/v2 v2.0.1/go.mod h1:RcCdONR2ScXaYnQC5tUzxzlpA3WVYF7/opLeUgcQs/o=
Expand Down Expand Up @@ -208,8 +212,8 @@ github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZ
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
Expand Down

0 comments on commit cf7a732

Please sign in to comment.