envoyproxy · arkodg · Jul 31, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024
@@ -144,6 +144,9 @@ jobs:
     - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332  # v4.1.7
     - uses: ./tools/github-actions/setup-deps
 
+    - name: Setup Graphviz
+      uses: ts-graphviz/setup-graphviz@v2
+
     # Benchmark
     - name: Run Benchmark tests
       env:

@@ -25,6 +25,9 @@ jobs:
     - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332  # v4.1.7
     - uses: ./tools/github-actions/setup-deps
 
+    - name: Setup Graphviz
+      uses: ts-graphviz/setup-graphviz@v2
+
     # Benchmark
     - name: Run Benchmark tests
       env:

@@ -18,6 +18,9 @@ jobs:
       - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332  # v4.1.7
       - uses: ./tools/github-actions/setup-deps
 
+      - name: Setup Graphviz
+        uses: ts-graphviz/setup-graphviz@v2
+
       # Benchmark
       - name: Run Benchmark tests
         env:

@@ -96,5 +96,5 @@
 	if includeEds {
 		reqPath = fmt.Sprintf("%s?include_eds", reqPath)
 	}
-	return requestWithPortForwarder(cli, nn, 19000, reqPath)
+	return RequestWithPortForwarder(cli, nn, 19000, reqPath)
 }
@@ -94,7 +94,7 @@
 			reqPath = v
 		}
 
-		data, err := requestWithPortForwarder(cliClient, nn, port, reqPath)
+		data, err := RequestWithPortForwarder(cliClient, nn, port, reqPath)
 		if err != nil {
 			logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of err: %v", pod.Namespace, pod.Name, err))
 			continue
@@ -121,7 +121,7 @@
 	return pods.Items, nil
 }
 
-func requestWithPortForwarder(cli kube.CLIClient, nn types.NamespacedName, port int, reqPath string) ([]byte, error) {
+func RequestWithPortForwarder(cli kube.CLIClient, nn types.NamespacedName, port int, reqPath string) ([]byte, error) {
 	fw, err := kube.NewLocalPortForwarder(cli, nn, 0, port)
 	if err != nil {
 		return nil, err

@@ -76,6 +76,10 @@ func RenderReport(writer io.Writer, name, description string, titleLevel int, re
 
 	writeSection(writer, "Metrics", titleLevel+1, "")
 	renderMetricsTable(writer, reports)
+
+	writeSection(writer, "Profiles", titleLevel+1, "")
+	renderProfilesTable(writer, "Memory", "heap", titleLevel+2, reports)
+
 	return nil
 }
 
@@ -145,6 +149,17 @@ func renderMetricsTable(writer io.Writer, reports []*BenchmarkReport) {
 	_ = table.Flush()
 }
 
+func renderProfilesTable(writer io.Writer, target, key string, titleLevel int, reports []*BenchmarkReport) {
+	writeSection(writer, target, titleLevel, "")
+
+	for _, report := range reports {
+		// The image is not be rendered yet, so it is a placeholder for the path.
+		// The image will be rendered after the test has finished.
+		writeSection(writer, report.Name, titleLevel+1,
+			fmt.Sprintf("![%s-%s](%s.png)", key, report.Name, report.ProfilesPath[key]))
+	}
+}
+
 // writeSection writes one section in Markdown style, content is optional.
 func writeSection(writer io.Writer, title string, level int, content string) {
 	md := fmt.Sprintf("\n%s %s\n", strings.Repeat("#", level), title)

@@ -13,35 +13,51 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"os"
+	"path"
 	"strconv"
+	"strings"
 
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 
 	kube "github.com/envoyproxy/gateway/internal/kubernetes"
+	"github.com/envoyproxy/gateway/internal/troubleshoot/collect"
 	prom "github.com/envoyproxy/gateway/test/utils/prometheus"
 )
 
 type BenchmarkReport struct {
-	Name    string
-	Result  []byte
-	Metrics map[string]float64 // metricTableHeaderName:metricValue
+	Name              string
+	Result            []byte
+	Metrics           map[string]float64 // metricTableHeaderName:metricValue
+	ProfilesPath      map[string]string  // profileKey:profileFilepath
+	ProfilesOutputDir string
 
 	kubeClient kube.CLIClient
 	promClient *prom.Client
 }
 
-func NewBenchmarkReport(name string, kubeClient kube.CLIClient, promClient *prom.Client) *BenchmarkReport {
-	return &BenchmarkReport{
-		Name:       name,
-		Metrics:    make(map[string]float64),
-		kubeClient: kubeClient,
-		promClient: promClient,
+func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClient, promClient *prom.Client) (*BenchmarkReport, error) {
+	if err := createDirIfNotExist(profilesOutputDir); err != nil {
+		return nil, err
 	}
+
+	return &BenchmarkReport{
+		Name:              name,
+		Metrics:           make(map[string]float64),
+		ProfilesPath:      make(map[string]string),
+		ProfilesOutputDir: profilesOutputDir,
+		kubeClient:        kubeClient,
+		promClient:        promClient,
+	}, nil
 }
 
 func (r *BenchmarkReport) Collect(ctx context.Context, job *types.NamespacedName) error {
+	if err := r.GetProfiles(ctx); err != nil {
+		return err
+	}
+
 	if err := r.GetMetrics(ctx); err != nil {
 		return err
 	}
@@ -109,6 +125,33 @@ func (r *BenchmarkReport) GetMetrics(ctx context.Context) error {
 	return nil
 }
 
+func (r *BenchmarkReport) GetProfiles(ctx context.Context) error {
+	egPod, err := r.fetchEnvoyGatewayPod(ctx)
+	if err != nil {
+		return err
+	}
+
+	// Memory heap profiles.
+	heapProf, err := collect.RequestWithPortForwarder(
+		r.kubeClient, types.NamespacedName{Name: egPod.Name, Namespace: egPod.Namespace}, 19000, "/debug/pprof/heap",
+	)
+	if err != nil {
+		return err
+	}
+
+	heapProfPath := path.Join(r.ProfilesOutputDir, fmt.Sprintf("heap.%s.pprof", r.Name))
+	if err = os.WriteFile(heapProfPath, heapProf, 0o600); err != nil {
+		return fmt.Errorf("failed to write profiles %s: %w", heapProfPath, err)
+	}
+
+	// Remove parent output report dir.
+	splits := strings.SplitN(heapProfPath, "/", 2)[0]
+	heapProfPath = strings.TrimPrefix(heapProfPath, splits+"/")
+	r.ProfilesPath["heap"] = heapProfPath
+
+	return nil
+}
+
 // getLogsFromPod scrapes the logs directly from the pod (default container).
 func (r *BenchmarkReport) getLogsFromPod(ctx context.Context, pod *types.NamespacedName) ([]byte, error) {
 	podLogOpts := corev1.PodLogOptions{}
@@ -129,3 +172,19 @@ func (r *BenchmarkReport) getLogsFromPod(ctx context.Context, pod *types.Namespa
 
 	return buf.Bytes(), nil
 }
+
+func (r *BenchmarkReport) fetchEnvoyGatewayPod(ctx context.Context) (*corev1.Pod, error) {
+	egPods, err := r.kubeClient.Kube().CoreV1().
+		Pods("envoy-gateway-system").
+		List(ctx, metav1.ListOptions{LabelSelector: "control-plane=envoy-gateway"})
+	if err != nil {
+		return nil, err
+	}
+
+	if len(egPods.Items) < 1 {
+		return nil, fmt.Errorf("failed to get any pods for envoy-gateway")
+	}
+
+	// Using the first one pod as default envoy-gateway pod
+	return &egPods.Items[0], nil
+}
@@ -104,14 +104,8 @@ func NewBenchmarkTestSuite(client client.Client, options BenchmarkOptions,
 
 	// Ensure the report directory exist.
 	if len(reportDir) > 0 {
-		if _, err = os.Stat(reportDir); err != nil {
-			if os.IsNotExist(err) {
-				if err = os.MkdirAll(reportDir, os.ModePerm); err != nil {
-					return nil, err
-				}
-			} else {
-				return nil, err
-			}
+		if err = createDirIfNotExist(reportDir); err != nil {
+			return nil, err
 		}
 	}
 
@@ -232,7 +226,11 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, name,
 
 	t.Logf("Running benchmark test: %s successfully", name)
 
-	report := NewBenchmarkReport(name, b.kubeClient, b.promClient)
+	report, err := NewBenchmarkReport(name, path.Join(b.ReportSaveDir, "profiles"), b.kubeClient, b.promClient)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create benchmark report: %w", err)
+	}
+
 	// Get all the reports from this benchmark test run.
 	if err = report.Collect(ctx, jobNN); err != nil {
 		return nil, err
@@ -392,3 +390,15 @@ func (b *BenchmarkTestSuite) RegisterCleanup(t *testing.T, ctx context.Context,
 		t.Logf("Clean up complete!")
 	})
 }
+
+func createDirIfNotExist(dir string) (err error) {
+	if _, err = os.Stat(dir); err != nil {
+		if os.IsNotExist(err) {
+			if err = os.MkdirAll(dir, os.ModePerm); err == nil {
+				return nil
+			}
+		}
+		return err
+	}
+	return nil
+}
@@ -79,6 +79,7 @@ kube-deploy-for-benchmark-test: manifests helm-generate ## Install Envoy Gateway
 	helm install eg charts/gateway-helm --set deployment.envoyGateway.imagePullPolicy=$(IMAGE_PULL_POLICY) \
 		--set deployment.envoyGateway.resources.limits.cpu=$(BENCHMARK_CPU_LIMITS) \
 		--set deployment.envoyGateway.resources.limits.memory=$(BENCHMARK_MEMORY_LIMITS) \
+		--set config.envoyGateway.admin.enablePprof=true \
 		-n envoy-gateway-system --create-namespace --debug --timeout='$(WAIT_TIMEOUT)' --wait --wait-for-jobs
 	# Install Prometheus-server only
 	helm install eg-addons charts/gateway-addons-helm --set loki.enabled=false \
@@ -169,6 +170,12 @@ run-benchmark: install-benchmark-server ## Run benchmark tests
 	kubectl wait --timeout=$(WAIT_TIMEOUT) -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available
 	kubectl apply -f test/benchmark/config/gatewayclass.yaml
 	go test -v -tags benchmark -timeout $(BENCHMARK_TIMEOUT) ./test/benchmark --rps=$(BENCHMARK_RPS) --connections=$(BENCHMARK_CONNECTIONS) --duration=$(BENCHMARK_DURATION) --report-save-dir=$(BENCHMARK_REPORT_DIR)
+	# render benchmark profiles into image
+	dot -V
+	@for profile in $(wildcard test/benchmark/$(BENCHMARK_REPORT_DIR)/profiles/*.pprof); do \
+		$(call log, "Rendering profile image for: $${profile}"); \
+		go tool pprof -png $${profile} > $${profile}.png; \
+	done
 
 .PHONY: install-benchmark-server
 install-benchmark-server: ## Install nighthawk server for benchmark test