forked from nukedupe/docker-autoheal
-
Notifications
You must be signed in to change notification settings - Fork 2
/
docker-entrypoint
executable file
·166 lines (145 loc) · 4.85 KB
/
docker-entrypoint
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env sh
set -e
# shellcheck disable=2039
set -o pipefail
DOCKER_SOCK=${DOCKER_SOCK:-/var/run/docker.sock}
UNIX_SOCK=""
CURL_TIMEOUT=${CURL_TIMEOUT:-30}
WEBHOOK_URL=${WEBHOOK_URL:-""}
WEBHOOK_JSON_KEY=${WEBHOOK_JSON_KEY:-"text"}
APPRISE_URL=${APPRISE_URL:-""}
# only use unix domain socket if no TCP endpoint is defined
case "${DOCKER_SOCK}" in
"tcp://"*) HTTP_ENDPOINT="$(echo ${DOCKER_SOCK} | sed 's#tcp://#http://#')"
;;
"tcps://"*) HTTP_ENDPOINT="$(echo ${DOCKER_SOCK} | sed 's#tcps://#https://#')"
CA="--cacert /certs/ca.pem"
CLIENT_KEY="--key /certs/client-key.pem"
CLIENT_CERT="--cert /certs/client-cert.pem"
;;
*) HTTP_ENDPOINT="http://localhost"
UNIX_SOCK="--unix-socket ${DOCKER_SOCK}"
;;
esac
AUTOHEAL_CONTAINER_LABEL=${AUTOHEAL_CONTAINER_LABEL:-autoheal}
AUTOHEAL_START_PERIOD=${AUTOHEAL_START_PERIOD:-0}
AUTOHEAL_INTERVAL=${AUTOHEAL_INTERVAL:-5}
AUTOHEAL_DEFAULT_STOP_TIMEOUT=${AUTOHEAL_DEFAULT_STOP_TIMEOUT:-10}
docker_curl() {
curl --max-time "${CURL_TIMEOUT}" --no-buffer -s \
${CA} ${CLIENT_KEY} ${CLIENT_CERT} \
${UNIX_SOCK} \
"$@"
}
# shellcheck disable=2039
get_container_info() {
local label_filter
local url
# Set container selector
if [ "$AUTOHEAL_CONTAINER_LABEL" = "all" ]
then
label_filter=""
else
label_filter=",\"label\":\[\"${AUTOHEAL_CONTAINER_LABEL}=true\"\]"
fi
url="${HTTP_ENDPOINT}/containers/json?filters=\{\"health\":\[\"unhealthy\"\]${label_filter}\}"
docker_curl "$url"
}
# shellcheck disable=2039
restart_container() {
local container_id="$1"
local timeout="$2"
docker_curl -f -X POST "${HTTP_ENDPOINT}/containers/${container_id}/restart?t=${timeout}"
}
notify_webhook() {
local text="$@"
if [ -n "$WEBHOOK_URL" ]
then
# execute webhook requests as background process to prevent healer from blocking
curl -s -X POST -H "Content-type: application/json" -d "$(generate_webhook_payload $text)" $WEBHOOK_URL
fi
if [ -n "$APPRISE_URL" ]
then
# execute webhook requests as background process to prevent healer from blocking
curl -s -X POST -H "Content-type: application/json" -d "$(generate_apprise_payload $text)" $APPRISE_URL
fi
}
notify_post_restart_script() {
if [ -n "$POST_RESTART_SCRIPT" ]
then
# execute post restart script as background process to prevent healer from blocking
$POST_RESTART_SCRIPT "$@" &
fi
}
# https://towardsdatascience.com/proper-ways-to-pass-environment-variables-in-json-for-curl-post-f797d2698bf3
generate_webhook_payload() {
local text="$@"
cat <<EOF
{
"$WEBHOOK_JSON_KEY":"$text"
}
EOF
}
generate_apprise_payload() {
local text="$@"
cat <<EOF
{
"title":"Autoheal",
"body":"$text"
}
EOF
}
# SIGTERM-handler
term_handler() {
exit 143 # 128 + 15 -- SIGTERM
}
# shellcheck disable=2039
trap 'kill $$; term_handler' SIGTERM
if [ "$1" = "autoheal" ]
then
if [ -n "$UNIX_SOCK" ] && ! [ -S "$DOCKER_SOCK" ]
then
echo "unix socket is currently not available" >&2
exit 1
fi
# Delayed startup
if [ "$AUTOHEAL_START_PERIOD" -gt 0 ]
then
echo "Monitoring containers for unhealthy status in $AUTOHEAL_START_PERIOD second(s)"
sleep "$AUTOHEAL_START_PERIOD" &
wait $!
fi
while true
do
STOP_TIMEOUT=".Labels[\"autoheal.stop.timeout\"] // $AUTOHEAL_DEFAULT_STOP_TIMEOUT"
get_container_info | \
jq -r ".[] | select(.Labels[\"autoheal\"] != \"False\") | foreach . as \$CONTAINER([];[]; \$CONTAINER | .Id, .Names[0], .State, ${STOP_TIMEOUT})" | \
while read -r CONTAINER_ID && read -r CONTAINER_NAME && read -r CONTAINER_STATE && read -r TIMEOUT
do
# shellcheck disable=2039
CONTAINER_SHORT_ID=${CONTAINER_ID:0:12}
DATE=$(date +%d-%m-%Y" "%H:%M:%S)
if [ "$CONTAINER_NAME" = "null" ]
then
echo "$DATE Container name of (${CONTAINER_SHORT_ID}) is null, which implies container does not exist - don't restart" >&2
elif [ "$CONTAINER_STATE" = "restarting" ]
then
echo "$DATE Container $CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be restarting - don't restart"
else
echo "$DATE Container $CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be unhealthy - Restarting container now with ${TIMEOUT}s timeout"
if ! restart_container "$CONTAINER_ID" "$TIMEOUT"
then
echo "$DATE Restarting container $CONTAINER_SHORT_ID failed" >&2
notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Failed to restart the container!" &
else
notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Successfully restarted the container!" &
fi
notify_post_restart_script "$CONTAINER_NAME" "$CONTAINER_SHORT_ID" "$CONTAINER_STATE" "$TIMEOUT" &
fi
done
sleep "$AUTOHEAL_INTERVAL" &
wait $!
done
else
exec "$@"
fi