diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 7a50f30158a..420c5b38c52 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -53,8 +53,8 @@ }, { "ImportPath": "github.com/opencontainers/specs", - "Comment": "v0.1.1", - "Rev": "f6ec7a75c65cd58322ec120f651ccdf465af7973" + "Comment": "v0.1.1-16-g96bcd04", + "Rev": "96bcd043aa8a28f6f64c95ad61329765f01de1ba" }, { "ImportPath": "github.com/seccomp/libseccomp-golang", diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS b/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS index 906510b6001..48e6ee1301e 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS @@ -6,3 +6,4 @@ Mrunal Patel (@mrunalp) Vincent Batts (@vbatts) Daniel, Dao Quang Minh (@dqminh) Brandon Philips (@philips) +Tianon Gravi (@tianon) diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go b/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go index 560d15aa57e..b7bfb2a36f4 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go @@ -1,5 +1,3 @@ -// +build linux - package specs // LinuxSpec is the full specification for linux containers. diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md index f6bf82ee3e8..c4bd31d50d8 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md @@ -20,17 +20,17 @@ Also, when a path is specified, a runtime MUST assume that the setup for that pa "path": "/var/run/netns/neta" }, { - "type": "mount", + "type": "mount" }, { - "type": "ipc", + "type": "ipc" }, { - "type": "uts", + "type": "uts" }, { - "type": "user", - }, + "type": "user" + } ] ``` @@ -50,16 +50,16 @@ within the container. Devices is an array specifying the list of devices to be created in the container. Next parameters can be specified: -* type - type of device: 'c', 'b', 'u' or 'p'. More info in `man mknod` -* path - full path to device inside container -* major, minor - major, minor numbers for device. More info in `man mknod`. +* **type** - type of device: `c`, `b`, `u` or `p`. More info in `man mknod` +* **path** - full path to device inside container +* **major, minor** - major, minor numbers for device. More info in `man mknod`. There is special value: `-1`, which means `*` for `device` cgroup setup. -* permissions - cgroup permissions for device. A composition of 'r' - (read), 'w' (write), and 'm' (mknod). -* fileMode - file mode for device file -* uid - uid of device owner -* gid - gid of device owner +* **permissions** - cgroup permissions for device. A composition of `r` + (read), `w` (write), and `m` (mknod). +* **fileMode** - file mode for device file +* **uid** - uid of device owner +* **gid** - gid of device owner ```json "devices": [ @@ -132,7 +132,7 @@ Also known as cgroups, they are used to restrict resource usage for a container cgroups provide controls to restrict cpu, memory, IO, pids and network for the container. For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt). -The path to the cgroups can to be specified in the Spec via `cgroupsPath`. +The path to the cgroups can be specified in the Spec via `cgroupsPath`. `cgroupsPath` is expected to be relative to the cgroups mount point. If not specified, cgroups will be created under '/'. Implementations of the Spec can choose to name cgroups in any manner. @@ -146,45 +146,128 @@ The cgroups will be created if they don't exist. `cgroupsPath` can be used to either control the cgroups hierarchy for containers or to run a new process in an existing container. -Optionally, cgroups limits can be specified via `resources`. +You can configure a container's cgroups via the `resources` field of the Linux configuration. +Do not specify `resources` unless limits have to be updated. +For example, to run a new process in an existing container without updating limits, `resources` need not be specified. + +#### Disable out-of-memory killer ```json - "resources": { - "disableOOMKiller": false, - "memory": { - "limit": 0, - "reservation": 0, - "swap": 0, - "kernel": 0, - "swappiness": -1 - }, - "cpu": { - "shares": 0, - "quota": 0, - "period": 0, - "realtimeRuntime": 0, - "realtimePeriod": 0, - "cpus": "", - "mems": "" - }, - "blockIO": { - "blkioWeight": 0, - "blkioWeightDevice": "", - "blkioThrottleReadBpsDevice": "", - "blkioThrottleWriteBpsDevice": "", - "blkioThrottleReadIopsDevice": "", - "blkioThrottleWriteIopsDevice": "" - }, - "hugepageLimits": null, - "network": { - "classId": "", - "priorities": null - } + "disableOOMKiller": false +``` + +#### Memory + +```json + "memory": { + "limit": 0, + "reservation": 0, + "swap": 0, + "kernel": 0, + "swappiness": -1 } ``` -Do not specify `resources` unless limits have to be updated. -For example, to run a new process in an existing container without updating limits, `resources` need not be specified. +#### CPU + +```json + "cpu": { + "shares": 0, + "quota": 0, + "period": 0, + "realtimeRuntime": 0, + "realtimePeriod": 0, + "cpus": "", + "mems": "" + } +``` + +#### Block IO Controller + +`blockIO` represents the cgroup subsystem `blkio` which implements the block io controller. +For more information, see the [kernel cgroups documentation about `blkio`](https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt). + +The following parameters can be specified to setup the block io controller: + +* **`blkioWeight`** *(uint16, optional)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. The range is from 10 to 1000. + +* **`blkioLeafWeight`** *(uint16, optional)* - equivalents of `blkioWeight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. The range is from 10 to 1000. + +* **`blkioWeightDevice`** *(array, optional)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device: + * **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`. + * **`weight`** *(uint16, optional)* - bandwidth rate for the device, range is from 10 to 1000. + * **`leafWeight`** *(uint16, optional)* - bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only. + + You must specify at least one of `weight` or `leafWeight` in a given entry, and can specify both. + +* **`blkioThrottleReadBpsDevice`**, **`blkioThrottleWriteBpsDevice`**, **`blkioThrottleReadIOPSDevice`**, **`blkioThrottleWriteIOPSDevice`** *(array, optional)* - specify the list of devices which will be IO rate limited. The following parameters can be specified per-device: + * **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`. + * **`rate`** *(uint64, required)* - IO rate limit for the device + +###### Example + +```json + "blockIO": { + "blkioWeight": 0, + "blkioLeafWeight": 0, + "blkioWeightDevice": [ + { + "major": 8, + "minor": 0, + "weight": 500, + "leafWeight": 300 + }, + { + "major": 8, + "minor": 16, + "weight": 500 + } + ], + "blkioThrottleReadBpsDevice": [ + { + "major": 8, + "minor": 0, + "rate": 600 + } + ], + "blkioThrottleWriteIOPSDevice": [ + { + "major": 8, + "minor": 16, + "rate": 300 + } + ] + } +``` + +#### Huge page limits + +```json + "hugepageLimits": [ + { + "pageSize": "2MB", + "limit": 9223372036854771712 + } + ] +``` + +#### Network + +```json + "network": { + "classId": "ClassId", + "priorities": [ + { + "name": "eth0", + "priority": 500 + }, + { + "name": "eth1", + "priority": 1000 + } + ] + } +``` ## Sysctl @@ -200,6 +283,10 @@ For more information, see [the man page](http://man7.org/linux/man-pages/man8/sy ## Rlimits +rlimits allow setting resource limits. +`type` is a string with a value from those defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html). +The kernel enforces the `soft` limit for a resource while the `hard` limit acts as a ceiling for that value that could be set by an unprivileged process. + ```json "rlimits": [ { @@ -210,10 +297,6 @@ For more information, see [the man page](http://man7.org/linux/man-pages/man8/sy ] ``` -rlimits allow setting resource limits. -`type` is a string with a value from those defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html). -The kernel enforces the `soft` limit for a resource while the `hard` limit acts as a ceiling for that value that could be set by an unprivileged process. - ## SELinux process label SELinux process label specifies the label with which the processes in a container are run. diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go index 97b0d63990f..42e3fa654e3 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go @@ -92,8 +92,10 @@ type Rlimit struct { // HugepageLimit structure corresponds to limiting kernel hugepages type HugepageLimit struct { + // Pagesize is the hugepage size Pagesize string `json:"pageSize"` - Limit int `json:"limit"` + // Limit is the limit of "hugepagesize" hugetlb usage + Limit uint64 `json:"limit"` } // InterfacePriority for network interfaces @@ -104,20 +106,46 @@ type InterfacePriority struct { Priority int64 `json:"priority"` } -// BlockIO for Linux cgroup 'blockio' resource management +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// BlockIO for Linux cgroup 'blkio' resource management type BlockIO struct { // Specifies per cgroup weight, range is from 10 to 1000 - Weight int64 `json:"blkioWeight"` + Weight uint16 `json:"blkioWeight"` + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"blkioLeafWeight"` // Weight per cgroup per device, can override BlkioWeight - WeightDevice string `json:"blkioWeightDevice"` + WeightDevice []*WeightDevice `json:"blkioWeightDevice"` // IO read rate limit per cgroup per device, bytes per second - ThrottleReadBpsDevice string `json:"blkioThrottleReadBpsDevice"` - // IO write rate limit per cgroup per divice, bytes per second - ThrottleWriteBpsDevice string `json:"blkioThrottleWriteBpsDevice"` + ThrottleReadBpsDevice []*ThrottleDevice `json:"blkioThrottleReadBpsDevice"` + // IO write rate limit per cgroup per device, bytes per second + ThrottleWriteBpsDevice []*ThrottleDevice `json:"blkioThrottleWriteBpsDevice"` // IO read rate limit per cgroup per device, IO per second - ThrottleReadIOpsDevice string `json:"blkioThrottleReadIopsDevice"` + ThrottleReadIOPSDevice []*ThrottleDevice `json:"blkioThrottleReadIOPSDevice"` // IO write rate limit per cgroup per device, IO per second - ThrottleWriteIOpsDevice string `json:"blkioThrottleWriteIopsDevice"` + ThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkioThrottleWriteIOPSDevice"` } // Memory for Linux cgroup 'memory' resource management diff --git a/libcontainer/cgroups/fs/blkio.go b/libcontainer/cgroups/fs/blkio.go index 3b9405542db..db886bcf1d2 100644 --- a/libcontainer/cgroups/fs/blkio.go +++ b/libcontainer/cgroups/fs/blkio.go @@ -32,33 +32,41 @@ func (s *BlkioGroup) Apply(d *data) error { func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.BlkioWeight != 0 { - if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil { + if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.BlkioWeight), 10)); err != nil { return err } } - if cgroup.BlkioWeightDevice != "" { - if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil { + if cgroup.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.BlkioLeafWeight), 10)); err != nil { return err } } - if cgroup.BlkioThrottleReadBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil { + for _, wd := range cgroup.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { return err } } - if cgroup.BlkioThrottleWriteBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { return err } } - if cgroup.BlkioThrottleReadIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { return err } } - if cgroup.BlkioThrottleWriteIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { return err } } diff --git a/libcontainer/cgroups/fs/blkio_test.go b/libcontainer/cgroups/fs/blkio_test.go index 485ab1ce565..ba312f3a868 100644 --- a/libcontainer/cgroups/fs/blkio_test.go +++ b/libcontainer/cgroups/fs/blkio_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" ) const ( @@ -69,8 +70,6 @@ Total 22061056` 252:0 Async 164 252:0 Total 164 Total 328` - throttleBefore = `8:0 1024` - throttleAfter = `8:0 2048` ) func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { @@ -112,14 +111,53 @@ func TestBlkioSetWeightDevice(t *testing.T) { const ( weightDeviceBefore = "8:0 400" - weightDeviceAfter = "8:0 500" ) + wd := configs.NewWeightDevice(8, 0, 500, 0) + weightDeviceAfter := wd.WeightString() + + helper.writeFileContents(map[string]string{ + "blkio.weight_device": weightDeviceBefore, + }) + + helper.CgroupData.c.BlkioWeightDevice = []*configs.WeightDevice{wd} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device") + if err != nil { + t.Fatalf("Failed to parse blkio.weight_device - %s", err) + } + + if value != weightDeviceAfter { + t.Fatal("Got the wrong value, set blkio.weight_device failed.") + } +} + +// regression #274 +func TestBlkioSetMultipleWeightDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightDeviceBefore = "8:0 400" + ) + + wd1 := configs.NewWeightDevice(8, 0, 500, 0) + wd2 := configs.NewWeightDevice(8, 16, 500, 0) + // we cannot actually set and check both because normal ioutil.WriteFile + // when writing to cgroup file will overwrite the whole file content instead + // of updating it as the kernel is doing. Just check the second device + // is present will suffice for the test to ensure multiple writes are done. + weightDeviceAfter := wd2.WeightString() + helper.writeFileContents(map[string]string{ "blkio.weight_device": weightDeviceBefore, }) - helper.CgroupData.c.BlkioWeightDevice = weightDeviceAfter + helper.CgroupData.c.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) @@ -480,11 +518,18 @@ func TestBlkioSetThrottleReadBpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + helper.writeFileContents(map[string]string{ "blkio.throttle.read_bps_device": throttleBefore, }) - helper.CgroupData.c.BlkioThrottleReadBpsDevice = throttleAfter + helper.CgroupData.c.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) @@ -503,11 +548,18 @@ func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + helper.writeFileContents(map[string]string{ "blkio.throttle.write_bps_device": throttleBefore, }) - helper.CgroupData.c.BlkioThrottleWriteBpsDevice = throttleAfter + helper.CgroupData.c.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) @@ -526,11 +578,18 @@ func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + helper.writeFileContents(map[string]string{ "blkio.throttle.read_iops_device": throttleBefore, }) - helper.CgroupData.c.BlkioThrottleReadIOpsDevice = throttleAfter + helper.CgroupData.c.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) @@ -549,11 +608,18 @@ func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + helper.writeFileContents(map[string]string{ "blkio.throttle.write_iops_device": throttleBefore, }) - helper.CgroupData.c.BlkioThrottleWriteIOpsDevice = throttleAfter + helper.CgroupData.c.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) diff --git a/libcontainer/cgroups/fs/hugetlb.go b/libcontainer/cgroups/fs/hugetlb.go index 7f192a9ae29..a7d3a8700db 100644 --- a/libcontainer/cgroups/fs/hugetlb.go +++ b/libcontainer/cgroups/fs/hugetlb.go @@ -29,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *data) error { func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { for _, hugetlb := range cgroup.HugetlbLimit { - if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { return err } } diff --git a/libcontainer/cgroups/fs/hugetlb_test.go b/libcontainer/cgroups/fs/hugetlb_test.go index 39c3ee650e7..29acbdc6350 100644 --- a/libcontainer/cgroups/fs/hugetlb_test.go +++ b/libcontainer/cgroups/fs/hugetlb_test.go @@ -33,7 +33,7 @@ func TestHugetlbSetHugetlb(t *testing.T) { ) helper.writeFileContents(map[string]string{ - limit: strconv.Itoa(hugetlbBefore), + limit: strconv.FormatUint(hugetlbBefore, 10), }) helper.CgroupData.c.HugetlbLimit = []*configs.HugepageLimit{ diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 7b523da9be8..f930d00694b 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -547,28 +547,37 @@ func joinBlkio(c *configs.Cgroup, pid int) error { if err != nil { return err } - if c.BlkioWeightDevice != "" { - if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil { + // systemd doesn't directly support this in the dbus properties + if c.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.BlkioLeafWeight), 10)); err != nil { return err } } - if c.BlkioThrottleReadBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil { + for _, wd := range c.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range c.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { return err } } - if c.BlkioThrottleWriteBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil { + for _, td := range c.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { return err } } - if c.BlkioThrottleReadIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil { + for _, td := range c.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { return err } } - if c.BlkioThrottleWriteIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil { + for _, td := range c.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { return err } } diff --git a/libcontainer/configs/blkio_device.go b/libcontainer/configs/blkio_device.go new file mode 100644 index 00000000000..e0f3ca16533 --- /dev/null +++ b/libcontainer/configs/blkio_device.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} diff --git a/libcontainer/configs/cgroup.go b/libcontainer/configs/cgroup.go index 140b530d662..bad86b036a6 100644 --- a/libcontainer/configs/cgroup.go +++ b/libcontainer/configs/cgroup.go @@ -57,23 +57,26 @@ type Cgroup struct { // MEM to use CpusetMems string `json:"cpuset_mems"` + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight uint16 `json:"blkio_weight"` + + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` + // IO read rate limit per cgroup per device, bytes per second. - BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"` + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` // IO write rate limit per cgroup per divice, bytes per second. - BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"` + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` // IO read rate limit per cgroup per device, IO per second. - BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"` + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` // IO write rate limit per cgroup per device, IO per second. - BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"` - - // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight int64 `json:"blkio_weight"` - - // Weight per cgroup per device, can override BlkioWeight. - BlkioWeightDevice string `json:"blkio_weight_device"` + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` // set the freeze value for the process Freezer FreezerState `json:"freezer"` diff --git a/libcontainer/configs/hugepage_limit.go b/libcontainer/configs/hugepage_limit.go index 1cce8d09be9..d30216380b5 100644 --- a/libcontainer/configs/hugepage_limit.go +++ b/libcontainer/configs/hugepage_limit.go @@ -5,5 +5,5 @@ type HugepageLimit struct { Pagesize string `json:"page_size"` // usage limit for hugepage. - Limit int `json:"limit"` + Limit uint64 `json:"limit"` } diff --git a/spec.go b/spec.go index ff9e5c941b6..014ae73e58a 100644 --- a/spec.go +++ b/spec.go @@ -427,12 +427,28 @@ func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*co c.CpuRtPeriod = r.CPU.RealtimePeriod c.CpusetCpus = r.CPU.Cpus c.CpusetMems = r.CPU.Mems - c.BlkioThrottleReadBpsDevice = r.BlockIO.ThrottleReadBpsDevice - c.BlkioThrottleWriteBpsDevice = r.BlockIO.ThrottleWriteBpsDevice - c.BlkioThrottleReadIOpsDevice = r.BlockIO.ThrottleReadIOpsDevice - c.BlkioThrottleWriteIOpsDevice = r.BlockIO.ThrottleWriteIOpsDevice c.BlkioWeight = r.BlockIO.Weight - c.BlkioWeightDevice = r.BlockIO.WeightDevice + c.BlkioLeafWeight = r.BlockIO.LeafWeight + for _, wd := range r.BlockIO.WeightDevice { + weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, wd.Weight, wd.LeafWeight) + c.BlkioWeightDevice = append(c.BlkioWeightDevice, weightDevice) + } + for _, td := range r.BlockIO.ThrottleReadBpsDevice { + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate) + c.BlkioThrottleReadBpsDevice = append(c.BlkioThrottleReadBpsDevice, throttleDevice) + } + for _, td := range r.BlockIO.ThrottleWriteBpsDevice { + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate) + c.BlkioThrottleWriteBpsDevice = append(c.BlkioThrottleWriteBpsDevice, throttleDevice) + } + for _, td := range r.BlockIO.ThrottleReadIOPSDevice { + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate) + c.BlkioThrottleReadIOPSDevice = append(c.BlkioThrottleReadIOPSDevice, throttleDevice) + } + for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate) + c.BlkioThrottleWriteIOPSDevice = append(c.BlkioThrottleWriteIOPSDevice, throttleDevice) + } for _, l := range r.HugepageLimits { c.HugetlbLimit = append(c.HugetlbLimit, &configs.HugepageLimit{ Pagesize: l.Pagesize,