From 26c1cf9eeeb39e9bfcc1a37743679e5c76ab97b0 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 11 Sep 2014 10:50:04 -0400 Subject: cloud-init-local depends on /run. reflect that in upstart job. With the writing of cloud-init status, cloud-init-local needs to have /run mounted. The issue we were seeing was a race where: cloud-init-local creates /run/cloud-init /run is mounted cloud-init-local tries to link a file into /run/cloud-init that directory was no longer visisable as /run was mounted over the top. LP: #1353008 --- upstart/cloud-init-local.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'upstart') diff --git a/upstart/cloud-init-local.conf b/upstart/cloud-init-local.conf index 061fe406..713b36d9 100644 --- a/upstart/cloud-init-local.conf +++ b/upstart/cloud-init-local.conf @@ -1,6 +1,6 @@ # cloud-init - the initial cloud-init job # crawls metadata service, emits cloud-config -start on mounted MOUNTPOINT=/ +start on mounted MOUNTPOINT=/ and mounted MOUNTPOINT=/run task -- cgit v1.2.3 From b09b12bc0130b6787bfee0332addd055459b9629 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 12 Sep 2014 13:03:44 -0400 Subject: upstart: add blocknet which blocks net until after cloud-init-local This makes it so networking wont start to come up until after cloud-init-local has had a chance to search local datasources and set /etc/network/interfaces. The changes most likely need to still be done for systemd. LP: #1368861 --- upstart/cloud-init-blocknet.conf | 83 ++++++++++++++++++++++++++++++++++++++++ upstart/cloud-init-local.conf | 9 ++++- upstart/cloud-init-nonet.conf | 5 +-- 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 upstart/cloud-init-blocknet.conf (limited to 'upstart') diff --git a/upstart/cloud-init-blocknet.conf b/upstart/cloud-init-blocknet.conf new file mode 100644 index 00000000..82d5ffbc --- /dev/null +++ b/upstart/cloud-init-blocknet.conf @@ -0,0 +1,83 @@ +# cloud-init-blocknet +# the purpose of this job is +# * to block networking from coming up until cloud-init-nonet has run +# * timeout if they all do not come up in a reasonable amount of time +description "block networking until cloud-init-local" +start on (starting network-interface + or starting network-manager + or starting networking) +stop on stopped cloud-init-local + +instance $JOB${INTERFACE:+/}${INTERFACE:-} +export INTERFACE +task + +script + set +e # you cannot trap TERM reliably with 'set -e' + SLEEP_CHILD="" + LOG="/run/${UPSTART_JOB}.log" + + static_network_up() { + local emitted="/run/network/static-network-up-emitted" + # /run/network/static-network-up-emitted is written by + # upstart (via /etc/network/if-up.d/upstart). its presense would + # indicate that static-network-up has already fired. + [ -e "$emitted" -o -e "/var/$emitted" ] + } + msg() { + local uptime="" idle="" msg="" + if [ -r /proc/uptime ]; then + read uptime idle < /proc/uptime + fi + msg="${UPSTART_INSTANCE}${uptime:+[${uptime}]}: $*" + echo "$msg" >> "$LOG" + echo "$msg" + } + + handle_sigterm() { + # if we received sigterm and static networking is up then it probably + # came from upstart as a result of 'stop on static-network-up' + msg "got sigterm" + if [ -n "$SLEEP_CHILD" ]; then + if ! kill $SLEEP_CHILD 2>/dev/null; then + [ ! -d "/proc/$SLEEP_CHILD" ] || + msg "hm.. failed to kill sleep pid $SLEEP_CHILD" + fi + fi + msg "stopped" + exit 0 + } + + dowait() { + msg "blocking $1 seconds" + # all this 'exec -a' does is get me a nicely named process in 'ps' + # ie, 'sleep-block-network-interface.eth1' + if [ -x /bin/bash ]; then + bash -c 'exec -a sleep-block-$1 sleep $2' -- "$UPSTART_INSTANCE" "$1" & + else + sleep "$1" & + fi + SLEEP_CHILD=$! + msg "sleepchild=$SLEEP_CHILD" + wait $SLEEP_CHILD + SLEEP_CHILD="" + } + + trap handle_sigterm TERM + + if [ -n "$INTERFACE" -a "${INTERFACE#lo}" != "${INTERFACE}" ]; then + msg "ignoring interface ${INTERFACE}"; + exit 0; + fi + + # static_network_up already occurred + static_network_up && { msg "static_network_up already"; exit 0; } + + # local-finished cloud-init-local success or failure + lfin="/run/cloud-init/local-finished" + [ -f "$lfin" ] && { msg "$lfin found"; exit 0; } + + dowait 120 + msg "gave up waiting for $lfin" + exit 1 +end script diff --git a/upstart/cloud-init-local.conf b/upstart/cloud-init-local.conf index 713b36d9..5def043d 100644 --- a/upstart/cloud-init-local.conf +++ b/upstart/cloud-init-local.conf @@ -6,4 +6,11 @@ task console output -exec /usr/bin/cloud-init init --local +script + lfin=/run/cloud-init/local-finished + ret=0 + cloud-init init --local || ret=$? + [ -r /proc/uptime ] && read up idle < /proc/uptime || up="N/A" + echo "$ret up $up" > "$lfin" + exit $ret +end script diff --git a/upstart/cloud-init-nonet.conf b/upstart/cloud-init-nonet.conf index e8ebee96..40059ad6 100644 --- a/upstart/cloud-init-nonet.conf +++ b/upstart/cloud-init-nonet.conf @@ -58,9 +58,8 @@ script # static_network_up already occurred static_network_up && exit 0 - # obj.pkl comes from cloud-init-local (or previous boot and - # manual_cache_clean) - [ -f /var/lib/cloud/instance/obj.pkl ] && exit 0 + # local-finished comes from cloud-init-local + [ -f /run/cloud-init/local-finished ] && exit 0 dowait 10 dowait 120 -- cgit v1.2.3 From 8eb2f6ea588775d3ddd55a62630cab310497190e Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 12 Sep 2014 15:38:56 -0400 Subject: allow local disabling of blocknet --- upstart/cloud-init-blocknet.conf | 2 ++ 1 file changed, 2 insertions(+) (limited to 'upstart') diff --git a/upstart/cloud-init-blocknet.conf b/upstart/cloud-init-blocknet.conf index 82d5ffbc..dd4f1798 100644 --- a/upstart/cloud-init-blocknet.conf +++ b/upstart/cloud-init-blocknet.conf @@ -75,7 +75,9 @@ script # local-finished cloud-init-local success or failure lfin="/run/cloud-init/local-finished" + disable="/etc/cloud/no-blocknet" [ -f "$lfin" ] && { msg "$lfin found"; exit 0; } + [ -f "$disable" ] && { msg "$disable found"; exit 0; } dowait 120 msg "gave up waiting for $lfin" -- cgit v1.2.3 From 41a52ef99a9dddd3fef9bdc883dfca55770ef2a5 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 12 Sep 2014 17:45:20 -0400 Subject: upstart/cloud-init-nonet.conf: do not exit on existance of obj.pkl not sure why this was here. the intent must of have been to allow for a local datasource to continue booting and not annoyingly block waiting for network information (if ithere was no network information). However, that seems wrong. If the datasource wipes /etc/network/interfaces and there are no network interfaces then we're probably breaking that use case here. However we're fixing the other more common case. --- upstart/cloud-init-nonet.conf | 3 --- 1 file changed, 3 deletions(-) (limited to 'upstart') diff --git a/upstart/cloud-init-nonet.conf b/upstart/cloud-init-nonet.conf index 40059ad6..38630b39 100644 --- a/upstart/cloud-init-nonet.conf +++ b/upstart/cloud-init-nonet.conf @@ -58,9 +58,6 @@ script # static_network_up already occurred static_network_up && exit 0 - # local-finished comes from cloud-init-local - [ -f /run/cloud-init/local-finished ] && exit 0 - dowait 10 dowait 120 msg "gave up waiting for a network device." -- cgit v1.2.3 From f5be74dc84aac7bb9a634e5518f5aac6ae80098a Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 12 Sep 2014 17:59:52 -0400 Subject: upstart/cloud-init-nonet.conf: only mention wait if larger than 5 seconds silently wait 5 seconds for networking to come up. We started seeing the message more now, as we are now blocking the networking from coming up until cloud-init-local is done. Previously that would have happened in parallel, and we were less likely to see that message. --- upstart/cloud-init-nonet.conf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'upstart') diff --git a/upstart/cloud-init-nonet.conf b/upstart/cloud-init-nonet.conf index 38630b39..6abf6573 100644 --- a/upstart/cloud-init-nonet.conf +++ b/upstart/cloud-init-nonet.conf @@ -46,7 +46,7 @@ script } dowait() { - msg "waiting $1 seconds for network device" + [ $# -eq 2 ] || msg "waiting $1 seconds for network device" sleep "$1" & SLEEP_CHILD=$! wait $SLEEP_CHILD @@ -58,8 +58,9 @@ script # static_network_up already occurred static_network_up && exit 0 + dowait 5 silent dowait 10 - dowait 120 + dowait 115 msg "gave up waiting for a network device." : > /var/lib/cloud/data/no-net end script -- cgit v1.2.3 From 6093b8b2733814b9265494c47f4268167c9491ab Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 15 Sep 2014 09:40:29 -0400 Subject: cloud-init-blocknet: remove debug code going to /run/cloud-init-blocknet --- upstart/cloud-init-blocknet.conf | 2 -- 1 file changed, 2 deletions(-) (limited to 'upstart') diff --git a/upstart/cloud-init-blocknet.conf b/upstart/cloud-init-blocknet.conf index dd4f1798..be09e7d8 100644 --- a/upstart/cloud-init-blocknet.conf +++ b/upstart/cloud-init-blocknet.conf @@ -15,7 +15,6 @@ task script set +e # you cannot trap TERM reliably with 'set -e' SLEEP_CHILD="" - LOG="/run/${UPSTART_JOB}.log" static_network_up() { local emitted="/run/network/static-network-up-emitted" @@ -30,7 +29,6 @@ script read uptime idle < /proc/uptime fi msg="${UPSTART_INSTANCE}${uptime:+[${uptime}]}: $*" - echo "$msg" >> "$LOG" echo "$msg" } -- cgit v1.2.3