Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.1] Fix systemd cgroup driver's Apply (and make CI green again) #3806

Merged
merged 4 commits into from
Apr 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,32 +353,52 @@ func isUnitExists(err error) bool {
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
}

func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property) error {
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
statusChan := make(chan string, 1)
retry := true

retry:
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
return err
})
if err == nil {
timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()

select {
case s := <-statusChan:
close(statusChan)
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
if s != "done" {
resetFailedUnit(cm, unitName)
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
}
case <-timeout.C:
if err != nil {
if !isUnitExists(err) {
return err
}
if ignoreExist {
// TODO: remove this hack.
// This is kubelet making sure a slice exists (see
// https://github.com/opencontainers/runc/pull/1124).
return nil
}
if retry {
// In case a unit with the same name exists, this may
// be a leftover failed unit. Reset it, so systemd can
// remove it, and retry once.
resetFailedUnit(cm, unitName)
return errors.New("Timeout waiting for systemd to create " + unitName)
retry = false
goto retry
}
} else if !isUnitExists(err) {
return err
}

timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()

select {
case s := <-statusChan:
close(statusChan)
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
if s != "done" {
resetFailedUnit(cm, unitName)
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
}
case <-timeout.C:
resetFailedUnit(cm, unitName)
return errors.New("Timeout waiting for systemd to create " + unitName)
}

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ func (m *legacyManager) Apply(pid int) error {

properties = append(properties, c.SystemdProps...)

if err := startUnit(m.dbus, unitName, properties); err != nil {
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
return err
}

Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ func (m *unifiedManager) Apply(pid int) error {

properties = append(properties, c.SystemdProps...)

if err := startUnit(m.dbus, unitName, properties); err != nil {
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
}

Expand Down
6 changes: 6 additions & 0 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,12 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err)
}
if len(pids) != 0 {
if config.Cgroups.Systemd {
// systemd cgroup driver can't add a pid to an
// existing systemd unit and will return an
// error anyway, so let's error out early.
return nil, fmt.Errorf("container's cgroup is not empty: %d process(es) found", len(pids))
}
// TODO: return an error.
logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids))
logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132")
Expand Down
13 changes: 10 additions & 3 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ function setup() {
[ "$output" = "ok" ]
}

@test "runc run/create should warn about a non-empty cgroup" {
@test "runc run/create should error/warn about a non-empty cgroup" {
if [[ "$ROOTLESS" -ne 0 ]]; then
requires rootless_cgroup
fi
Expand All @@ -358,14 +358,21 @@ function setup() {
runc run -d --console-socket "$CONSOLE_SOCKET" ct1
[ "$status" -eq 0 ]

# When systemd driver is used, runc can't add PID to an existing unit,
# so runc returns an error. For backward compatibility, we still allow
# such configuration in 1.1, but only when systemd driver is NOT used.
# See https://github.com/opencontainers/runc/issues/3780.
local exp=0
[[ -n "${RUNC_USE_SYSTEMD}" ]] && exp=1

# Run a second container sharing the cgroup with the first one.
runc --debug run -d --console-socket "$CONSOLE_SOCKET" ct2
[ "$status" -eq 0 ]
[ "$status" -eq "$exp" ]
[[ "$output" == *"container's cgroup is not empty"* ]]

# Same but using runc create.
runc create --console-socket "$CONSOLE_SOCKET" ct3
[ "$status" -eq 0 ]
[ "$status" -eq "$exp" ]
[[ "$output" == *"container's cgroup is not empty"* ]]
}

Expand Down