summaryrefslogtreecommitdiff
path: root/cloudinit/net
diff options
context:
space:
mode:
authorJason Zions <jasonzio@microsoft.com>2019-01-15 21:37:17 +0000
committerServer Team CI Bot <josh.powers+server-team-bot@canonical.com>2019-01-15 21:37:17 +0000
commitfdadcb5fae51f4e6799314ab98e3aec56c79b17c (patch)
tree318df8c54326032f8be340633b1642c642b202be /cloudinit/net
parentf19dc8fa62d4fd8de33311c3c75c5b6da440bebe (diff)
downloadvyos-cloud-init-fdadcb5fae51f4e6799314ab98e3aec56c79b17c.tar.gz
vyos-cloud-init-fdadcb5fae51f4e6799314ab98e3aec56c79b17c.zip
net: Wait for dhclient to daemonize before reading lease file
cloud-init uses dhclient to fetch the DHCP lease so it can extract DHCP options. dhclient creates the leasefile, then writes to it; simply waiting for the leasefile to appear creates a race between dhclient and cloud-init. Instead, wait for dhclient to be parented by init. At that point, we know it has written to the leasefile, so it's safe to copy the file and kill the process. cloud-init creates a temporary directory in which to execute dhclient, and deletes that directory after it has killed the process. If cloud-init abandons waiting for dhclient to daemonize, it will still attempt to delete the temporary directory, but will not report an exception should that attempt fail. LP: #1794399
Diffstat (limited to 'cloudinit/net')
-rw-r--r--cloudinit/net/dhcp.py44
-rw-r--r--cloudinit/net/tests/test_dhcp.py15
2 files changed, 43 insertions, 16 deletions
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
index 0db991db..c98a97cd 100644
--- a/cloudinit/net/dhcp.py
+++ b/cloudinit/net/dhcp.py
@@ -9,6 +9,7 @@ import logging
import os
import re
import signal
+import time
from cloudinit.net import (
EphemeralIPv4Network, find_fallback_nic, get_devicelist,
@@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None):
if not dhclient_path:
LOG.debug('Skip dhclient configuration: No dhclient command found.')
return []
- with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
+ with temp_utils.tempdir(rmtree_ignore_errors=True,
+ prefix='cloud-init-dhcp-',
+ needs_exe=True) as tdir:
# Use /var/tmp because /run/cloud-init/tmp is mounted noexec
return dhcp_discovery(dhclient_path, nic, tdir)
@@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
'-pf', pid_file, interface, '-sf', '/bin/true']
util.subp(cmd, capture=True)
- # dhclient doesn't write a pid file until after it forks when it gets a
- # proper lease response. Since cleandir is a temp directory that gets
- # removed, we need to wait for that pidfile creation before the
- # cleandir is removed, otherwise we get FileNotFound errors.
+ # Wait for pid file and lease file to appear, and for the process
+ # named by the pid file to daemonize (have pid 1 as its parent). If we
+ # try to read the lease file before daemonization happens, we might try
+ # to read it before the dhclient has actually written it. We also have
+ # to wait until the dhclient has become a daemon so we can be sure to
+ # kill the correct process, thus freeing cleandir to be deleted back
+ # up the callstack.
missing = util.wait_for_files(
[pid_file, lease_file], maxwait=5, naplen=0.01)
if missing:
LOG.warning("dhclient did not produce expected files: %s",
', '.join(os.path.basename(f) for f in missing))
return []
- pid_content = util.load_file(pid_file).strip()
- try:
- pid = int(pid_content)
- except ValueError:
- LOG.debug(
- "pid file contains non-integer content '%s'", pid_content)
- else:
- os.kill(pid, signal.SIGKILL)
+
+ ppid = 'unknown'
+ for _ in range(0, 1000):
+ pid_content = util.load_file(pid_file).strip()
+ try:
+ pid = int(pid_content)
+ except ValueError:
+ pass
+ else:
+ ppid = util.get_proc_ppid(pid)
+ if ppid == 1:
+ LOG.debug('killing dhclient with pid=%s', pid)
+ os.kill(pid, signal.SIGKILL)
+ return parse_dhcp_lease_file(lease_file)
+ time.sleep(0.01)
+
+ LOG.error(
+ 'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds',
+ pid_content, ppid, 0.01 * 1000
+ )
return parse_dhcp_lease_file(lease_file)
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
index cd3e7328..79e8842f 100644
--- a/cloudinit/net/tests/test_dhcp.py
+++ b/cloudinit/net/tests/test_dhcp.py
@@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase):
'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
dhcp_discovery(dhclient_script, 'eth9', tmpdir))
self.assertIn(
- "pid file contains non-integer content ''", self.logs.getvalue())
+ "dhclient(pid=, parentpid=unknown) failed "
+ "to daemonize after 10.0 seconds",
+ self.logs.getvalue())
m_kill.assert_not_called()
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
@mock.patch('cloudinit.net.dhcp.os.kill')
@mock.patch('cloudinit.net.dhcp.util.wait_for_files')
@mock.patch('cloudinit.net.dhcp.util.subp')
def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
m_subp,
m_wait,
- m_kill):
+ m_kill,
+ m_getppid):
"""dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
tmpdir = self.tmp_dir()
dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
@@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
pidfile = self.tmp_path('dhclient.pid', tmpdir)
leasefile = self.tmp_path('dhcp.leases', tmpdir)
m_wait.return_value = [pidfile] # Return the missing pidfile wait for
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
self.assertEqual(
mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
@@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase):
self.logs.getvalue())
m_kill.assert_not_called()
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
@mock.patch('cloudinit.net.dhcp.os.kill')
@mock.patch('cloudinit.net.dhcp.util.subp')
- def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
+ def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid):
"""dhcp_discovery brings up the interface and runs dhclient.
It also returns the parsed dhcp.leases file generated in the sandbox.
@@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
pid_file = os.path.join(tmpdir, 'dhclient.pid')
my_pid = 1
write_file(pid_file, "%d\n" % my_pid)
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
self.assertItemsEqual(
[{'interface': 'eth9', 'fixed-address': '192.168.2.74',
@@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase):
self.assertEqual(fake_lease, lease)
# Ensure that dhcp discovery occurs
m_dhcp.called_once_with()
+
+# vi: ts=4 expandtab