From 8d139b10e8b05be63906b50a2f2de1db31e8a7ec Mon Sep 17 00:00:00 2001 From: Wilfred Spiegelenburg Date: Tue, 12 Nov 2024 14:06:26 -0600 Subject: [PATCH] [YUNIKORN-2965] Move statedump and stack REST to debug endpoint (#992) The REST calls to get a statedump and the stacks of the system are exposed on the wrong endpoint. They belong in /debug and not in /ws/v1. Both are for troubleshooting only and the content is not stable. Placing a 301 redirect on the old endpoints for clients to follow. The proxy build into the web UI does not proxy the debug endpoints. Closes: #992 Signed-off-by: Craig Condit --- pkg/webservice/handlers.go | 18 +++++ pkg/webservice/handlers_test.go | 27 ++++++++ pkg/webservice/routes.go | 100 ++++++++++++++++------------ pkg/webservice/webservice_test.go | 107 ++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 43 deletions(-) create mode 100644 pkg/webservice/webservice_test.go diff --git a/pkg/webservice/handlers.go b/pkg/webservice/handlers.go index 1180b32ee..d06d62340 100644 --- a/pkg/webservice/handlers.go +++ b/pkg/webservice/handlers.go @@ -66,6 +66,9 @@ const ( AppStateActive = "active" AppStateRejected = "rejected" AppStateCompleted = "completed" + + WSBase = "/ws/v1" + DebugBase = "/debug" ) var allowedActiveStatusMsg string @@ -108,6 +111,21 @@ func init() { maxRESTResponseSize.Store(configs.DefaultRESTResponseSize) } +// redirectDebug redirect calls that used to be part of "/ws/v1" to "/debug" +// Moving the URl permanently for requests that should not have been part of the standard list +func redirectDebug(w http.ResponseWriter, r *http.Request) { + code := http.StatusMovedPermanently + // replace the path: we know we have the right URL as the router would not have called this + r.URL.Path = strings.Replace(r.URL.Path, WSBase, DebugBase, 1) + redirect := r.URL.String() + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Header().Set("Location", redirect) + w.WriteHeader(code) + // do as recommended in the RFC 7321, 6.4.2 + body := "" + http.StatusText(code) + ".\n" + _, _ = fmt.Fprintln(w, body) +} + func getStackInfo(w http.ResponseWriter, r *http.Request) { writeHeaders(w) var stack = func() []byte { diff --git a/pkg/webservice/handlers_test.go b/pkg/webservice/handlers_test.go index e3465b891..4ffb231cf 100644 --- a/pkg/webservice/handlers_test.go +++ b/pkg/webservice/handlers_test.go @@ -2960,6 +2960,33 @@ func TestGetPartitionRuleHandler(t *testing.T) { assert.Equal(t, partitionRules[3].Name, types.Recovery) } +func TestRedirectDebugHandler(t *testing.T) { + NewWebApp(&scheduler.ClusterContext{}, nil) + base := "http://yunikorn.host.com:9080" + code := http.StatusMovedPermanently + tests := []struct { + name string + reqURL string + redirect string + }{ + {"statedump", "/ws/v1/fullstatedump", "/debug/fullstatedump"}, + {"stacks", "/ws/v1/stack", "/debug/stack"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req, err := createRequest(t, base+tt.reqURL, map[string]string{}) + assert.NilError(t, err, httpRequestError) + resp := &MockResponseWriter{} + redirectDebug(resp, req) + assert.Equal(t, resp.statusCode, code, "expected moved permanently status") + loc := resp.Header().Get("Location") + assert.Assert(t, loc != "", "expected redirect header to be set") + assert.Assert(t, strings.HasSuffix(loc, tt.redirect), "expected redirect to debug") + assert.Assert(t, strings.Contains(string(resp.outputBytes), http.StatusText(code))) + }) + } +} + func TestSetMaxRESTResponseSize(t *testing.T) { current := configs.GetConfigMap() defer configs.SetConfigMap(current) diff --git a/pkg/webservice/routes.go b/pkg/webservice/routes.go index 3a03ff5b4..50c5e20a3 100644 --- a/pkg/webservice/routes.go +++ b/pkg/webservice/routes.go @@ -40,40 +40,26 @@ var webRoutes = routes{ "/ws/v1/clusters", getClusterInfo, }, - - // endpoint to retrieve goroutines info route{ - "Scheduler", - "GET", - "/ws/v1/stack", - getStackInfo, - }, - - // endpoint to retrieve server metrics - route{ - "Scheduler", + "Cluster", "GET", "/ws/v1/metrics", getMetrics, }, - - // endpoint to retrieve the current conf route{ - "Scheduler", + "Cluster", "GET", "/ws/v1/config", getClusterConfig, }, - - // endpoint to validate conf route{ - "Scheduler", + "Cluster", "POST", "/ws/v1/validate-conf", validateConf, }, - // endpoint to retrieve historical data + // endpoints to retrieve general scheduler info route{ "Scheduler", "GET", @@ -87,7 +73,7 @@ var webRoutes = routes{ getContainerHistory, }, route{ - "Partitions", + "Scheduler", "GET", "/ws/v1/partitions", getPartitions, @@ -176,12 +162,6 @@ var webRoutes = routes{ "/ws/v1/partition/:partition/usage/group/:group", getGroupResourceUsage, }, - route{ - "Scheduler", - "GET", - "/ws/v1/fullstatedump", - getFullStateDump, - }, route{ "Scheduler", "GET", @@ -194,10 +174,37 @@ var webRoutes = routes{ "/ws/v1/events/stream", getStream, }, - // endpoint to retrieve CPU, Memory profiling data, - // this works with pprof tool. By default, pprof endpoints - // are only registered to http.DefaultServeMux. Here, we - // need to explicitly register all handlers. + route{ + "Scheduler", + "GET", + "/ws/v1/scheduler/healthcheck", + checkHealthStatus, + }, + route{ + "Scheduler", + "GET", + "/ws/v1/scheduler/node-utilizations", + getNodeUtilisations, + }, + + // endpoints to retrieve debug info + // + // These endpoints are not to be proxied by the web server. The content is not for general consumption. + // The content is not considered stable and can change from release to release. + // All pprof endpoints provide profiling data in the format expected by the pprof visualization tool. + // We need to explicitly register all handlers as we do not use the DefaultServeMux + route{ + Name: "System", + Method: "GET", + Pattern: "/debug/stack", + HandlerFunc: getStackInfo, + }, + route{ + Name: "System", + Method: "GET", + Pattern: "/debug/fullstatedump", + HandlerFunc: getFullStateDump, + }, route{ Name: "System", Method: "GET", @@ -264,24 +271,31 @@ var webRoutes = routes{ Pattern: "/debug/pprof/trace", HandlerFunc: pprof.Trace, }, - // endpoint to check health status + + // Deprecated REST calls + // + // Replaced with /ws/v1/scheduler/node-utilizations as part of YuniKorn 1.5 + // Remove as part of YuniKorn 1.8 route{ - "Scheduler", - "GET", - "/ws/v1/scheduler/healthcheck", - checkHealthStatus, + Name: "Scheduler", + Method: "GET", + Pattern: "/ws/v1/scheduler/node-utilization", + HandlerFunc: getNodeUtilisation, }, - // Deprecated - To be removed in next major release. Replaced with /ws/v1/scheduler/node-utilizations + // Permanently moved to the debug endpoint as part of YuniKorn 1.7 + // Remove redirect in YuniKorn 1.10 route{ - "Scheduler", - "GET", - "/ws/v1/scheduler/node-utilization", - getNodeUtilisation, + Name: "Scheduler", + Method: "GET", + Pattern: "/ws/v1/stack", + HandlerFunc: redirectDebug, }, + // Permanently moved to the debug endpoint as part of YuniKorn 1.7 + // Remove redirect in YuniKorn 1.10 route{ - "Scheduler", - "GET", - "/ws/v1/scheduler/node-utilizations", - getNodeUtilisations, + Name: "Scheduler", + Method: "GET", + Pattern: "/ws/v1/fullstatedump", + HandlerFunc: redirectDebug, }, } diff --git a/pkg/webservice/webservice_test.go b/pkg/webservice/webservice_test.go new file mode 100644 index 000000000..1b723f4a0 --- /dev/null +++ b/pkg/webservice/webservice_test.go @@ -0,0 +1,107 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package webservice + +import ( + "fmt" + "io" + "net/http" + "testing" + + "gotest.tools/v3/assert" + + "github.com/apache/yunikorn-core/pkg/metrics/history" + "github.com/apache/yunikorn-core/pkg/scheduler" +) + +func Test_RedirectDebugHandler(t *testing.T) { + defer ResetIMHistory() + s := NewWebApp(&scheduler.ClusterContext{}, history.NewInternalMetricsHistory(5)) + s.StartWebApp() + defer func(s *WebService) { + err := s.StopWebApp() + if err != nil { + t.Fatal("failed to stop webapp") + } + }(s) + base := "http://localhost:9080" + tests := []struct { + name string + reqURL string + redirect string + }{ + {"statedump", "/ws/v1/fullstatedump", "/debug/fullstatedump"}, + {"stacks", "/ws/v1/stack", "/debug/stack"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if req.URL.Path != tt.redirect { + return fmt.Errorf("expected redirect to '%s' got '%s'", tt.redirect, req.URL.Path) + } + return nil + }, + } + resp, err := client.Get(base + tt.reqURL) + assert.NilError(t, err, "unexpected error returned") + _ = resp.Body.Close() // not interested in the error + assert.Equal(t, resp.StatusCode, http.StatusOK, "expected OK after redirect") + }) + } +} + +func Test_RouterHandling(t *testing.T) { + s := NewWebApp(&scheduler.ClusterContext{}, nil) + s.StartWebApp() + defer func(s *WebService) { + err := s.StopWebApp() + if err != nil { + t.Fatal("failed to stop webapp") + } + }(s) + base := "http://localhost:9080" + client := &http.Client{} + // unsupported POST + resp, err := client.Post(base+"/ws/v1/clusters", "application/json; charset=UTF-8", nil) + assert.NilError(t, err, "unexpected error returned") + assert.Equal(t, resp.StatusCode, http.StatusMethodNotAllowed, "expected method not allowed") + var body []byte + body, err = io.ReadAll(resp.Body) + _ = resp.Body.Close() // not interested in the error + assert.NilError(t, err, "unexpected error reading body") + assert.Assert(t, body != nil, "expected body with status text") + resp, err = client.Head(base + "/ws/v1/clusters") + assert.NilError(t, err, "unexpected error returned") + body, err = io.ReadAll(resp.Body) + _ = resp.Body.Close() + assert.NilError(t, err, "unexpected error reading body") + assert.Assert(t, body != nil, "expected body with status text") + assert.Equal(t, resp.StatusCode, http.StatusMethodNotAllowed, "expected method not allowed") + // get with trailing slash + resp, err = client.Get(base + "/ws/v1/clusters/") + assert.NilError(t, err, "unexpected error returned") + _ = resp.Body.Close() + assert.Equal(t, resp.StatusCode, http.StatusOK, "expected OK") + // get with case difference + resp, err = client.Get(base + "/ws/v1/CLUSTERS") + assert.NilError(t, err, "unexpected error returned") + _ = resp.Body.Close() + assert.Equal(t, resp.StatusCode, http.StatusOK, "expected OK") +}