diff options
Diffstat (limited to 'cloudinit/net')
-rw-r--r-- | cloudinit/net/dhcp.py | 44 | ||||
-rw-r--r-- | cloudinit/net/tests/test_dhcp.py | 15 |
2 files changed, 43 insertions, 16 deletions
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index 0db991db..c98a97cd 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -9,6 +9,7 @@ import logging import os import re import signal +import time from cloudinit.net import ( EphemeralIPv4Network, find_fallback_nic, get_devicelist, @@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None): if not dhclient_path: LOG.debug('Skip dhclient configuration: No dhclient command found.') return [] - with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir: + with temp_utils.tempdir(rmtree_ignore_errors=True, + prefix='cloud-init-dhcp-', + needs_exe=True) as tdir: # Use /var/tmp because /run/cloud-init/tmp is mounted noexec return dhcp_discovery(dhclient_path, nic, tdir) @@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir): '-pf', pid_file, interface, '-sf', '/bin/true'] util.subp(cmd, capture=True) - # dhclient doesn't write a pid file until after it forks when it gets a - # proper lease response. Since cleandir is a temp directory that gets - # removed, we need to wait for that pidfile creation before the - # cleandir is removed, otherwise we get FileNotFound errors. + # Wait for pid file and lease file to appear, and for the process + # named by the pid file to daemonize (have pid 1 as its parent). If we + # try to read the lease file before daemonization happens, we might try + # to read it before the dhclient has actually written it. We also have + # to wait until the dhclient has become a daemon so we can be sure to + # kill the correct process, thus freeing cleandir to be deleted back + # up the callstack. missing = util.wait_for_files( [pid_file, lease_file], maxwait=5, naplen=0.01) if missing: LOG.warning("dhclient did not produce expected files: %s", ', '.join(os.path.basename(f) for f in missing)) return [] - pid_content = util.load_file(pid_file).strip() - try: - pid = int(pid_content) - except ValueError: - LOG.debug( - "pid file contains non-integer content '%s'", pid_content) - else: - os.kill(pid, signal.SIGKILL) + + ppid = 'unknown' + for _ in range(0, 1000): + pid_content = util.load_file(pid_file).strip() + try: + pid = int(pid_content) + except ValueError: + pass + else: + ppid = util.get_proc_ppid(pid) + if ppid == 1: + LOG.debug('killing dhclient with pid=%s', pid) + os.kill(pid, signal.SIGKILL) + return parse_dhcp_lease_file(lease_file) + time.sleep(0.01) + + LOG.error( + 'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds', + pid_content, ppid, 0.01 * 1000 + ) return parse_dhcp_lease_file(lease_file) diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py index cd3e7328..79e8842f 100644 --- a/cloudinit/net/tests/test_dhcp.py +++ b/cloudinit/net/tests/test_dhcp.py @@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase): 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}], dhcp_discovery(dhclient_script, 'eth9', tmpdir)) self.assertIn( - "pid file contains non-integer content ''", self.logs.getvalue()) + "dhclient(pid=, parentpid=unknown) failed " + "to daemonize after 10.0 seconds", + self.logs.getvalue()) m_kill.assert_not_called() + @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid') @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.wait_for_files') @mock.patch('cloudinit.net.dhcp.util.subp') def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self, m_subp, m_wait, - m_kill): + m_kill, + m_getppid): """dhcp_discovery waits for the presence of pidfile and dhcp.leases.""" tmpdir = self.tmp_dir() dhclient_script = os.path.join(tmpdir, 'dhclient.orig') @@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase): pidfile = self.tmp_path('dhclient.pid', tmpdir) leasefile = self.tmp_path('dhcp.leases', tmpdir) m_wait.return_value = [pidfile] # Return the missing pidfile wait for + m_getppid.return_value = 1 # Indicate that dhclient has daemonized self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir)) self.assertEqual( mock.call([pidfile, leasefile], maxwait=5, naplen=0.01), @@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase): self.logs.getvalue()) m_kill.assert_not_called() + @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid') @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.subp') - def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill): + def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid): """dhcp_discovery brings up the interface and runs dhclient. It also returns the parsed dhcp.leases file generated in the sandbox. @@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase): pid_file = os.path.join(tmpdir, 'dhclient.pid') my_pid = 1 write_file(pid_file, "%d\n" % my_pid) + m_getppid.return_value = 1 # Indicate that dhclient has daemonized self.assertItemsEqual( [{'interface': 'eth9', 'fixed-address': '192.168.2.74', @@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase): self.assertEqual(fake_lease, lease) # Ensure that dhcp discovery occurs m_dhcp.called_once_with() + +# vi: ts=4 expandtab |