diff --git a/config-linux.md b/config-linux.md index 1927dae55..86ae92d64 100644 --- a/config-linux.md +++ b/config-linux.md @@ -55,28 +55,71 @@ within the container. ### Access to devices -Devices is an array specifying the list of devices from the host to make available in the container. -By providing a device name within the list the runtime should look up the same device on the host's `/dev` -and collect information about the device node so that it can be recreated for the container. The runtime -should not only create the device inside the container but ensure that the root user inside -the container has access rights for the device. +Devices required by the application should be supplied via the bundle filesystems and mounted via [mounts][]. +Bundle authors can create these files using [`mknod`][] or by copying nodes from their local host. +For example: + +```shell +$ mknod --mode a=rw rootfs/dev/random c 1 8 +$ cp --archive /dev/tty rootfs/dev/tty +``` + +## Linux control groups + +Also known as cgroups, they are used to restrict resource usage for a container and handle device access. +For more information, see the [kernel cgroups documentation][cgroups]. +You can configure a container's cgroups via the "resources" field of the Linux configuration. + +### Disable out-of-memory killer + +FIXME + +### Memory + +FIXME + +### CPU + +FIXME + +### Block I/O + +FIXME + +### Devices + +Container-side devices are [mounted from the bundle filesystems][mount-devices]. +Bundle authors can set major and minor nodes, owner IDs, filesystem permissions, etc. by altering those filesystems. +However, you cannot pass cgroup information via the bundle filesystem, so bundle authors that need special device cgroups should use the "devices" field of the resource configuration. +The fields are discussed [in the kernel documentation][cgroups-devices]. +The entries are applied to the container in the order that they are listed in the configuration. ```json "devices": [ - "null", - "random", - "full", - "tty", - "zero", - "urandom" - ] + { + "allow": false, + "type": "a", + "major": "*", + "minor": "*", + "access": "rwm", + }, + { + "allow": true, + "type": "c", + "major": "1", + "minor": "3", + "access": "mr", + } + ] ``` -## Linux control groups +### Huge page limits + +FIXME + +### Network -Also known as cgroups, they are used to restrict resource usage for a container and handle -device access. cgroups provide controls to restrict cpu, memory, IO, and network for -the container. For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt) +FIXME ## Linux capabilities @@ -150,3 +193,8 @@ rootfsPropagation sets the rootfs's mount propagation. Its value is either slave **TODO:** security profiles +[mounts]: config.md#mount-configuration +[mknod]: http://linux.die.net/man/1/mknod +[cgroups]: https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +[cgroups-devices]: https://www.kernel.org/doc/Documentation/cgroups/devices.txt +[mount-devices]: #access-to-devices diff --git a/spec_linux.go b/spec_linux.go index 3c82db4b4..9d111544c 100644 --- a/spec_linux.go +++ b/spec_linux.go @@ -26,8 +26,6 @@ type Linux struct { Namespaces []Namespace `json:"namespaces"` // Capabilities are Linux capabilities that are kept for the container Capabilities []string `json:"capabilities"` - // Devices are a list of device nodes that are created and enabled for the container - Devices []string `json:"devices"` // RootfsPropagation is the rootfs mount propagation mode for the container RootfsPropagation string `json:"rootfsPropagation"` } @@ -102,6 +100,21 @@ type BlockIO struct { ThrottleWriteIOpsDevice string `json:"blkioThrottleWriteIopsDevice"` } +// Device rule for Linux cgroup management +type Device struct { + // Whether the device is allowed (true) or denied (false) + Allow bool `json:"allow"` + // a (all), c (char), or b (block). 'all' means it applies to all + // types and all major and minor numbers + Type string `json:type` + // Major number. Either an integer or '*' for all. + Major string `json:major` + // Minor number. Either an integer or '*' for all. + Minor string `json:minor` + // a composition of r (read), w (write), and m (mknod). + Access string `json:access` +} + // Memory for Linux cgroup 'memory' resource management type Memory struct { // Memory limit (in bytes) @@ -152,6 +165,8 @@ type Resources struct { CPU CPU `json:"cpu"` // BlockIO restriction configuration BlockIO BlockIO `json:"blockIO"` + // Device configuration + Devices []Device `json:"devices"` // Hugetlb limit (in bytes) HugepageLimits []HugepageLimit `json:"hugepageLimits"` // Network restriction configuration