Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: talosctl support and race tests #10042

Merged
merged 1 commit into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-12-24T15:00:58Z by kres fcff05e.
# Generated on 2024-12-25T15:13:54Z by kres fcff05e.

name: default
concurrency:
Expand Down Expand Up @@ -3367,6 +3367,8 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_MEMORY_CONTROLPLANES: "4096"
QEMU_MEMORY_WORKERS: "4096"
TAG_SUFFIX: -race
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
run: |
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/integration-qemu-race-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-11-28T13:53:18Z by kres 232fe63.
# Generated on 2024-12-25T15:13:54Z by kres fcff05e.

name: integration-qemu-race-cron
concurrency:
Expand Down Expand Up @@ -94,6 +94,8 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_MEMORY_CONTROLPLANES: "4096"
QEMU_MEMORY_WORKERS: "4096"
TAG_SUFFIX: -race
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
run: |
Expand Down
2 changes: 2 additions & 0 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,8 @@ spec:
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_EXTRA_DISKS_DRIVERS: "ide,nvme"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml"
QEMU_MEMORY_CONTROLPLANES: 4096 # race-enabled Talos consumes lots of RAM
QEMU_MEMORY_WORKERS: 4096
TAG_SUFFIX: -race
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: save-talos-logs
Expand Down
34 changes: 21 additions & 13 deletions cmd/talosctl/cmd/talos/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"io"
"os"
"strings"
"sync"
"text/tabwriter"

"github.com/cosi-project/runtime/pkg/resource"
Expand Down Expand Up @@ -111,7 +112,7 @@ var supportCmd = &cobra.Command{
}

func collectData(dest *os.File, progress chan bundle.Progress) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
return WithClientNoNodes(func(ctx context.Context, c *client.Client) error {
clientset, err := getKubernetesClient(ctx, c)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create kubernetes client %s\n", err)
Expand Down Expand Up @@ -142,11 +143,7 @@ func collectData(dest *os.File, progress chan bundle.Progress) error {
}

func getKubernetesClient(ctx context.Context, c *client.Client) (*k8s.Clientset, error) {
if len(GlobalArgs.Endpoints) == 0 {
fmt.Fprintln(os.Stderr, "No endpoints set for the cluster, the command might not be able to get kubeconfig")
}

kubeconfig, err := c.Kubeconfig(client.WithNodes(ctx, GlobalArgs.Endpoints...))
kubeconfig, err := c.Kubeconfig(ctx)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -284,6 +281,7 @@ func showProgress(progress <-chan bundle.Progress, errors *supportBundleErrors)
uiprogress.Start()

type nodeProgress struct {
mu sync.Mutex
state string
bar *uiprogress.Bar
}
Expand All @@ -298,29 +296,39 @@ func showProgress(progress <-chan bundle.Progress, errors *supportBundleErrors)
ok bool
)

if np, ok = nodes[p.Source]; !ok {
src := p.Source

if _, ok = nodes[p.Source]; !ok {
bar := uiprogress.AddBar(p.Total)
bar = bar.AppendCompleted().PrependElapsed()

src := p.Source

np = &nodeProgress{
state: "initializing...",
bar: bar,
}

bar.AppendFunc(func(b *uiprogress.Bar) string {
return fmt.Sprintf("%s: %s", src, np.state)
})
bar.AppendFunc(
func(src string, np *nodeProgress) func(b *uiprogress.Bar) string {
return func(b *uiprogress.Bar) string {
np.mu.Lock()
defer np.mu.Unlock()

return fmt.Sprintf("%s: %s", src, np.state)
}
}(src, np),
)

bar.Width = 20

nodes[src] = np
} else {
np = nodes[p.Source]
np = nodes[src]
}

np.mu.Lock()
np.state = p.State
np.mu.Unlock()

np.bar.Incr()
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ require (
github.com/siderolabs/go-retry v0.3.3
github.com/siderolabs/go-smbios v0.3.3
github.com/siderolabs/go-tail v0.1.1
github.com/siderolabs/go-talos-support v0.1.1
github.com/siderolabs/go-talos-support v0.1.2
github.com/siderolabs/grpc-proxy v0.5.1
github.com/siderolabs/kms-client v0.1.0
github.com/siderolabs/net v0.4.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -675,8 +675,8 @@ github.com/siderolabs/go-smbios v0.3.3 h1:rM3UKHQ8in1mqNRkpV75Ls3Wnk6rAhQJVYKUsK
github.com/siderolabs/go-smbios v0.3.3/go.mod h1:kScnr0XSyzLfkRo/ChjITgI0rPRQnIi6PdgbxVCwA9U=
github.com/siderolabs/go-tail v0.1.1 h1:3XeJgd97OHyFAIE7nQEMcRhOfnv7DvXbu0BRKbtT6u8=
github.com/siderolabs/go-tail v0.1.1/go.mod h1:IihAL39acadXHfb5fEAOKK2DaDFIrG2+VD3b2H/ziZ0=
github.com/siderolabs/go-talos-support v0.1.1 h1:g51J0WQssQAycU/0cDliC2l4uX2H02yUs2+fa5pCvHg=
github.com/siderolabs/go-talos-support v0.1.1/go.mod h1:o4woiYS+2J3djCQgyHZRVZQm8XpazQr+XPcTXAZvamo=
github.com/siderolabs/go-talos-support v0.1.2 h1:xKFwT8emzxpmamIe3W35QlmadC54OaPNO9/Y+fL7WwM=
github.com/siderolabs/go-talos-support v0.1.2/go.mod h1:o9zRfWJQhW5j3PQxs7v0jmG4igD4peDatqbAGQFe4oo=
github.com/siderolabs/grpc-proxy v0.5.1 h1:WTZYLMPTZPt43BzEJ02LT9kYA9qAfquWwCezc6NPPYE=
github.com/siderolabs/grpc-proxy v0.5.1/go.mod h1:EQwE87LiWxhiIUPBeWmpjJb9DIWxWID8R6ARtdTC+8A=
github.com/siderolabs/kms-client v0.1.0 h1:rCDWzcDDsNlp6zdyLngOuuhchVILn+vwUQy3tk6rQps=
Expand Down
17 changes: 14 additions & 3 deletions internal/app/machined/pkg/startup/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/siderolabs/go-debug"
"github.com/siderolabs/go-pointer"
"go.uber.org/zap"

Expand All @@ -22,6 +23,16 @@ import (
"github.com/siderolabs/talos/pkg/machinery/constants"
)

func zeroIfRace[T any](v T) T {
if debug.RaceEnabled {
var zeroT T

return zeroT
}

return v
}

// CreateSystemCgroups creates system cgroups.
//
//nolint:gocyclo
Expand Down Expand Up @@ -130,7 +141,7 @@ func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtim
name: constants.CgroupDashboard,
resources: &cgroup2.Resources{
Memory: &cgroup2.Memory{
Max: pointer.To[int64](constants.CgroupDashboardMaxMemory),
Max: zeroIfRace(pointer.To[int64](constants.CgroupDashboardMaxMemory)),
},
CPU: &cgroup2.CPU{
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))),
Expand All @@ -143,7 +154,7 @@ func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtim
Memory: &cgroup2.Memory{
Min: pointer.To[int64](constants.CgroupApidReservedMemory),
Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2),
Max: pointer.To[int64](constants.CgroupApidMaxMemory),
Max: zeroIfRace(pointer.To[int64](constants.CgroupApidMaxMemory)),
},
CPU: &cgroup2.CPU{
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))),
Expand All @@ -156,7 +167,7 @@ func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtim
Memory: &cgroup2.Memory{
Min: pointer.To[int64](constants.CgroupTrustdReservedMemory),
Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2),
Max: pointer.To[int64](constants.CgroupTrustdMaxMemory),
Max: zeroIfRace(pointer.To[int64](constants.CgroupTrustdMaxMemory)),
},
CPU: &cgroup2.CPU{
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))),
Expand Down
8 changes: 7 additions & 1 deletion pkg/cluster/crashdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"io"
"os"
"time"

"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-talos-support/support"
Expand All @@ -33,6 +34,10 @@ func Crashdump(ctx context.Context, cluster provision.Cluster, logWriter io.Writ

defer supportFile.Close() //nolint:errcheck

// limit support bundle generation time
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()

c, err := client.New(ctx, client.WithDefaultConfig())
if err != nil {
fmt.Fprintf(logWriter, "error creating crashdump: %s\n", err)
Expand All @@ -50,7 +55,8 @@ func Crashdump(ctx context.Context, cluster provision.Cluster, logWriter io.Writ
bundle.WithArchiveOutput(supportFile),
bundle.WithTalosClient(c),
bundle.WithNodes(nodes...),
bundle.WithNumWorkers(1),
bundle.WithNumWorkers(4),
bundle.WithLogOutput(io.Discard),
}

kubeclient, err := getKubernetesClient(ctx, c, controlplane)
Expand Down