Skip to content

Commit

Permalink
roachtest: fix benchmark failing tests
Browse files Browse the repository at this point in the history
This change intends to fix 3 failures detected in benchmark runs after #133035
1. admission-control/elastic-io due to one wrong flag.
2. tpcc/headroom/n4cpu16 due to wrong passing of labels in case of openmetrics, removed suite label since it is not required.
3. tpccbench/* erroring out with nil pointer error in case of openmetrics, reverted the changes in this PR. Will fix in a different PR.

Epic: none
Fixes: #135393
  • Loading branch information
sambhav-jain-16 committed Nov 18, 2024
1 parent f27b402 commit da21b79
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 27 deletions.
1 change: 0 additions & 1 deletion pkg/cmd/roachtest/clusterstats/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ func GetOpenmetricsLabelMap(
defaultMap := map[string]string{
"cloud": c.Cloud().String(),
"owner": string(t.Spec().(*registry.TestSpec).Owner),
"suite": t.Spec().(*registry.TestSpec).Suites.String(),
}

// Since the roachtest have / delimiter for subtests
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/tests/admission_control_elastic_io.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func registerElasticIO(r registry.Registry) {
m.Go(func(ctx context.Context) error {
dur := " --duration=" + duration.String()
url := fmt.Sprintf(" {pgurl%s}", c.CRDBNodes())
cmd := fmt.Sprintf("./cockroach workload run kv --init %s -concurrency=512 "+
cmd := fmt.Sprintf("./cockroach workload run kv --init %s --concurrency=512 "+
"--splits=1000 --read-percent=0 --min-block-bytes=65536 --max-block-bytes=65536 "+
"--txn-qos=background --tolerate-errors --secure %s %s",
roachtestutil.GetWorkloadHistogramArgs(t, c, labels), dur, url)
Expand Down
46 changes: 21 additions & 25 deletions pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1683,46 +1683,42 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
extraFlags += " --method=simple"
}
t.Status(fmt.Sprintf("running benchmark, warehouses=%d", warehouses))
histogramsPath := fmt.Sprintf("%s/warehouses=%d/%s", t.PerfArtifactsDir(), warehouses, roachtestutil.GetBenchmarkMetricsFileName(t))
histogramsPath := fmt.Sprintf("%s/warehouses=%d/stats.json", t.PerfArtifactsDir(), warehouses)
var tenantSuffix string
if b.SharedProcessMT {
tenantSuffix = fmt.Sprintf(":%s", appTenantName)
}

labels := getTpccLabels(warehouses, rampDur, loadDur, nil)

cmd := fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --active-warehouses=%d "+
"--tolerate-errors --ramp=%s --duration=%s%s %s {pgurl%s%s}",
"--tolerate-errors --ramp=%s --duration=%s%s --histograms=%s {pgurl%s%s}",
b.LoadWarehouses(c.Cloud()), warehouses, rampDur,
loadDur, extraFlags, roachtestutil.GetWorkloadHistogramArgs(t, c, labels), sqlGateways, tenantSuffix)
loadDur, extraFlags, histogramsPath, sqlGateways, tenantSuffix)
err := c.RunE(ctx, option.WithNodes(group.LoadNodes), cmd)
loadDone <- timeutil.Now()
if err != nil {
// NB: this will let the line search continue at a lower warehouse
// count.
return errors.Wrapf(err, "error running tpcc load generator")
}
if !t.ExportOpenmetrics() {
roachtestHistogramsPath := filepath.Join(resultsDir, fmt.Sprintf("%d.%d-stats.json", warehouses, groupIdx))
if err := c.Get(
ctx, t.L(), histogramsPath, roachtestHistogramsPath, group.LoadNodes,
); err != nil {
// NB: this will let the line search continue. The reason we do this
// is because it's conceivable that we made it here, but a VM just
// froze up on us. The next search iteration will handle this state.
return err
}
snapshots, err := histogram.DecodeSnapshots(roachtestHistogramsPath)
if err != nil {
// If we got this far, and can't decode data, it's not a case of
// overload but something that deserves failing the whole test.
t.Fatal(err)
}
result := tpcc.NewResultWithSnapshots(warehouses, 0, snapshots)
resultChan <- result
return nil

roachtestHistogramsPath := filepath.Join(resultsDir, fmt.Sprintf("%d.%d-stats.json", warehouses, groupIdx))
if err := c.Get(
ctx, t.L(), histogramsPath, roachtestHistogramsPath, group.LoadNodes,
); err != nil {
// NB: this will let the line search continue. The reason we do this
// is because it's conceivable that we made it here, but a VM just
// froze up on us. The next search iteration will handle this state.
return err
}
snapshots, err := histogram.DecodeSnapshots(roachtestHistogramsPath)
if err != nil {
// If we got this far, and can't decode data, it's not a case of
// overload but something that deserves failing the whole test.
t.Fatal(err)
}
result := tpcc.NewResultWithSnapshots(warehouses, 0, snapshots)
resultChan <- result
return nil

})
}
failErr := m.WaitE()
Expand Down

0 comments on commit da21b79

Please sign in to comment.