forked from jpetazzo/dind
-
Notifications
You must be signed in to change notification settings - Fork 0
/
startdocker
executable file
·189 lines (165 loc) · 8.11 KB
/
startdocker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/bin/bash
# startdocker: start a Docker daemon in the current container. Useful when
# using systems like the Gitlab Runner's Kubernetes executor that can't use
# ENTRYPOINT. If PORT is set, starts the daemon in the foreground. Else, starts
# the daemon in the background. Passes along DOCKER_DAEMON_ARGS to the daemon,
# and sends the log to a file if LOG is set to "file".
#
# When cgroups v2 are in use, and we are found to be running at the root of the
# visible cgroup hierarchy (i.e. in a plain Kubernetes container not enhanced
# with something like
# https://github.com/k3d-io/k3d/pull/579/files#diff-71e760f22ea8192fe65294b2330d4bd29fc3888fbf283ba4ac69fda1af3878dd),
# then we try to turn off all the confinement domains, bundle all the processes
# in the cgroup into a new child cgroup, and turn them back on again, so that
# Docker can actually use cgroups v2 to further confine sub-containers.
# Otherwise, Docker tries to make a cgroup directly under the container's root
# one, and if it tries to enable e.g. a memory limit on it, cgroups v2 refuses
# because running processes aren't allowed in cgroups with resource-limited
# children, because that makes scheduler correctness hard. See
# https://github.com/docker/for-mac/issues/6288#issuecomment-1250799498 and
# also the documentation for cgroups v2 at
# https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html.
#
# To *stop* Docker, use stopdocker
#
# If running without PORT, waits for Docker to be ready, and automatically
# succeeds if docker is already available.
if [ ! "$PORT" ] && [ -e /var/run/docker.sock ]
then
# Someone must have already started Docker.
echo 'Not starting docker: /var/run/docker.sock already exists' >&2
else
# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
dmsetup mknodes
# First, make sure that cgroups are mounted correctly.
CGROUP=/sys/fs/cgroup
: {LOG:=stdio}
[ -d $CGROUP ] ||
mkdir $CGROUP
mountpoint -q $CGROUP ||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
echo "Could not make a tmpfs mount. Did you use --privileged?"
exit 1
}
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
then
mount -t securityfs none /sys/kernel/security || {
echo "Could not mount /sys/kernel/security."
echo "AppArmor detection and --privileged mode might break."
}
fi
# Mount the cgroup hierarchies exactly as they are in the parent system.
# TODO: Is this needed anymore? The Docker dind removed it.
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
do
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
mountpoint -q $CGROUP/$SUBSYS ||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
# The two following sections address a bug which manifests itself
# by a cryptic "lxc-start: no ns_cgroup option specified" when
# trying to start containers withina container.
# The bug seems to appear when the cgroup hierarchies are not
# mounted on the exact same directories in the host, and in the
# container.
# Named, control-less cgroups are mounted with "-o name=foo"
# (and appear as such under /proc/<pid>/cgroup) but are usually
# mounted on a directory named "foo" (without the "name=" prefix).
# Systemd and OpenRC (and possibly others) both create such a
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
# "name=foo". This shouldn't have any adverse effect.
# But this also tends to produce permissin errors, so we drop
# error output.
echo $SUBSYS | grep -q ^name= && {
NAME=$(echo $SUBSYS | sed s/^name=//)
ln -s $SUBSYS $CGROUP/$NAME 2>/dev/null
}
# Likewise, on at least one system, it has been reported that
# systemd would mount the CPU and CPU accounting controllers
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
# but on a directory called "cpu,cpuacct" (note the inversion
# in the order of the groups). This tries to work around it.
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct \
2>/dev/null
done
# Note: as I write those lines, the LXC userland tools cannot setup
# a "sub-container" properly if the "devices" cgroup is not in its
# own hierarchy. Let's detect this and issue a warning.
grep -q :devices: /proc/1/cgroup ||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
grep -qw devices /proc/1/cgroup ||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
# Now, close extraneous file descriptors.
pushd /proc/self/fd >/dev/null
for FD in *
do
case "$FD" in
# Keep stdin/stdout/stderr
[012])
;;
# Nuke everything else
*)
eval exec "$FD>&-"
;;
esac
done
popd >/dev/null
# If a pidfile is still around (for example after a container restart),
# delete it so that docker can start.
rm -rf /var/run/docker.pid
if grep ^$$\$ /sys/fs/cgroup/cgroup.procs >/dev/null && [ -e /sys/fs/cgroup/cgroup.controllers ]; then
# We need to move to a child cgroup. See
# https://github.com/moby/moby/blob/ed89041433a031cafc0a0f19cfe573c31688d377/hack/dind#L28-L37
# TODO: when containers change to get set up in a way that lets
# Docker just make its own v2 cgroup properly, stop trying to do
# this. See
# <https://github.com/docker/for-mac/issues/6288#issuecomment-1250799498>
echo "Moving everybody to a child cgroup. This may not work if we can't escape our cgroup!"
set -x
# The root cgroup we start in in a container might not actually have
# any controllers on yet, but we still need to move.
ACTIVE_CONTROLLERS="$(cat /sys/fs/cgroup/cgroup.subtree_control)"
# Prepend plusses and minuses
CONTROLLERS_OFF="$(echo "${WANTED_CONTROLLERS}" | sed 's/\(^\| \)\([^ ]\)/\1-\2/g')"
CONTROLLERS_ON="$(echo "${WANTED_CONTROLLERS}" | sed 's/\(^\| \)\([^ ]\)/\1+\2/g')"
# Turn off our subtree's controllers
echo "${CONTROLLERS_OFF}" > /sys/fs/cgroup/cgroup.subtree_control
# Make a new cgroup under this one. Hope it isn't used yet.
mkdir -p /sys/fs/cgroup/init
# Since the controllers are all off we can have child processes in
# a child cgroup, so move everybody one at a time and hope nobody
# is forking. Each process needs to move in its own write() call.
cat /sys/fs/cgroup/cgroup.procs | xargs -rn 1 echo >/sys/fs/cgroup/init/cgroup.procs
# Note that we still see the same cgroup hierarchy root even though
# we should now be in a child cgroup of where we were before.
# TODO: We will get "echo: write error: No such process" from this
# sometimes. Why? And can we drop whatever that is from the list in
# advance?
# Now turn the controllers back on again
echo "${CONTROLLERS_ON}" > /sys/fs/cgroup/cgroup.subtree_control
set +x
fi
# If we were given a PORT environment variable, start Docker in the foreground.
# Otherwise, start it in the background
if [ "$PORT" ]
then
exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \
$DOCKER_DAEMON_ARGS
else
if [ "$LOG" == "file" ]
then
(dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log &)
else
(dockerd $DOCKER_DAEMON_ARGS &)
fi
fi
fi
# We didn't exec, so wait for Docker to be ready
(( timeout = 60 + SECONDS ))
until docker info >/dev/null 2>&1 && [ -e /var/run/docker.pid ]
do
if (( SECONDS >= timeout )); then
echo 'Timed out trying to connect to internal docker host.' >&2
break
fi
sleep 1
done