diff options
-rw-r--r-- | cloudinit/net/dhcp.py | 44 | ||||
-rw-r--r-- | cloudinit/net/tests/test_dhcp.py | 15 | ||||
-rw-r--r-- | cloudinit/temp_utils.py | 4 | ||||
-rw-r--r-- | cloudinit/tests/test_temp_utils.py | 18 | ||||
-rw-r--r-- | cloudinit/util.py | 16 | ||||
-rw-r--r-- | tests/unittests/test_util.py | 6 |
6 files changed, 84 insertions, 19 deletions
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index 0db991db..c98a97cd 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -9,6 +9,7 @@ import logging import os import re import signal +import time from cloudinit.net import ( EphemeralIPv4Network, find_fallback_nic, get_devicelist, @@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None): if not dhclient_path: LOG.debug('Skip dhclient configuration: No dhclient command found.') return [] - with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir: + with temp_utils.tempdir(rmtree_ignore_errors=True, + prefix='cloud-init-dhcp-', + needs_exe=True) as tdir: # Use /var/tmp because /run/cloud-init/tmp is mounted noexec return dhcp_discovery(dhclient_path, nic, tdir) @@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir): '-pf', pid_file, interface, '-sf', '/bin/true'] util.subp(cmd, capture=True) - # dhclient doesn't write a pid file until after it forks when it gets a - # proper lease response. Since cleandir is a temp directory that gets - # removed, we need to wait for that pidfile creation before the - # cleandir is removed, otherwise we get FileNotFound errors. + # Wait for pid file and lease file to appear, and for the process + # named by the pid file to daemonize (have pid 1 as its parent). If we + # try to read the lease file before daemonization happens, we might try + # to read it before the dhclient has actually written it. We also have + # to wait until the dhclient has become a daemon so we can be sure to + # kill the correct process, thus freeing cleandir to be deleted back + # up the callstack. missing = util.wait_for_files( [pid_file, lease_file], maxwait=5, naplen=0.01) if missing: LOG.warning("dhclient did not produce expected files: %s", ', '.join(os.path.basename(f) for f in missing)) return [] - pid_content = util.load_file(pid_file).strip() - try: - pid = int(pid_content) - except ValueError: - LOG.debug( - "pid file contains non-integer content '%s'", pid_content) - else: - os.kill(pid, signal.SIGKILL) + + ppid = 'unknown' + for _ in range(0, 1000): + pid_content = util.load_file(pid_file).strip() + try: + pid = int(pid_content) + except ValueError: + pass + else: + ppid = util.get_proc_ppid(pid) + if ppid == 1: + LOG.debug('killing dhclient with pid=%s', pid) + os.kill(pid, signal.SIGKILL) + return parse_dhcp_lease_file(lease_file) + time.sleep(0.01) + + LOG.error( + 'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds', + pid_content, ppid, 0.01 * 1000 + ) return parse_dhcp_lease_file(lease_file) diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py index cd3e7328..79e8842f 100644 --- a/cloudinit/net/tests/test_dhcp.py +++ b/cloudinit/net/tests/test_dhcp.py @@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase): 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}], dhcp_discovery(dhclient_script, 'eth9', tmpdir)) self.assertIn( - "pid file contains non-integer content ''", self.logs.getvalue()) + "dhclient(pid=, parentpid=unknown) failed " + "to daemonize after 10.0 seconds", + self.logs.getvalue()) m_kill.assert_not_called() + @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid') @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.wait_for_files') @mock.patch('cloudinit.net.dhcp.util.subp') def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self, m_subp, m_wait, - m_kill): + m_kill, + m_getppid): """dhcp_discovery waits for the presence of pidfile and dhcp.leases.""" tmpdir = self.tmp_dir() dhclient_script = os.path.join(tmpdir, 'dhclient.orig') @@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase): pidfile = self.tmp_path('dhclient.pid', tmpdir) leasefile = self.tmp_path('dhcp.leases', tmpdir) m_wait.return_value = [pidfile] # Return the missing pidfile wait for + m_getppid.return_value = 1 # Indicate that dhclient has daemonized self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir)) self.assertEqual( mock.call([pidfile, leasefile], maxwait=5, naplen=0.01), @@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase): self.logs.getvalue()) m_kill.assert_not_called() + @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid') @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.subp') - def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill): + def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid): """dhcp_discovery brings up the interface and runs dhclient. It also returns the parsed dhcp.leases file generated in the sandbox. @@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase): pid_file = os.path.join(tmpdir, 'dhclient.pid') my_pid = 1 write_file(pid_file, "%d\n" % my_pid) + m_getppid.return_value = 1 # Indicate that dhclient has daemonized self.assertItemsEqual( [{'interface': 'eth9', 'fixed-address': '192.168.2.74', @@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase): self.assertEqual(fake_lease, lease) # Ensure that dhcp discovery occurs m_dhcp.called_once_with() + +# vi: ts=4 expandtab diff --git a/cloudinit/temp_utils.py b/cloudinit/temp_utils.py index c98a1b53..346276ec 100644 --- a/cloudinit/temp_utils.py +++ b/cloudinit/temp_utils.py @@ -81,7 +81,7 @@ def ExtendedTemporaryFile(**kwargs): @contextlib.contextmanager -def tempdir(**kwargs): +def tempdir(rmtree_ignore_errors=False, **kwargs): # This seems like it was only added in python 3.2 # Make it since its useful... # See: http://bugs.python.org/file12970/tempdir.patch @@ -89,7 +89,7 @@ def tempdir(**kwargs): try: yield tdir finally: - shutil.rmtree(tdir) + shutil.rmtree(tdir, ignore_errors=rmtree_ignore_errors) def mkdtemp(**kwargs): diff --git a/cloudinit/tests/test_temp_utils.py b/cloudinit/tests/test_temp_utils.py index ffbb92cd..4a52ef89 100644 --- a/cloudinit/tests/test_temp_utils.py +++ b/cloudinit/tests/test_temp_utils.py @@ -2,8 +2,9 @@ """Tests for cloudinit.temp_utils""" -from cloudinit.temp_utils import mkdtemp, mkstemp +from cloudinit.temp_utils import mkdtemp, mkstemp, tempdir from cloudinit.tests.helpers import CiTestCase, wrap_and_call +import os class TestTempUtils(CiTestCase): @@ -98,4 +99,19 @@ class TestTempUtils(CiTestCase): self.assertEqual('/fake/return/path', retval) self.assertEqual([{'dir': '/run/cloud-init/tmp'}], calls) + def test_tempdir_error_suppression(self): + """test tempdir suppresses errors during directory removal.""" + + with self.assertRaises(OSError): + with tempdir(prefix='cloud-init-dhcp-') as tdir: + os.rmdir(tdir) + # As a result, the directory is already gone, + # so shutil.rmtree should raise OSError + + with tempdir(rmtree_ignore_errors=True, + prefix='cloud-init-dhcp-') as tdir: + os.rmdir(tdir) + # Since the directory is already gone, shutil.rmtree would raise + # OSError, but we suppress that + # vi: ts=4 expandtab diff --git a/cloudinit/util.py b/cloudinit/util.py index 7800f7bc..a8a232b6 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -2876,4 +2876,20 @@ def udevadm_settle(exists=None, timeout=None): return subp(settle_cmd) +def get_proc_ppid(pid): + """ + Return the parent pid of a process. + """ + ppid = 0 + try: + contents = load_file("/proc/%s/stat" % pid, quiet=True) + except IOError as e: + LOG.warning('Failed to load /proc/%s/stat. %s', pid, e) + if contents: + parts = contents.split(" ", 4) + # man proc says + # ppid %d (4) The PID of the parent. + ppid = int(parts[3]) + return ppid + # vi: ts=4 expandtab diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 5a14479a..0e71db82 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -1171,4 +1171,10 @@ class TestGetProcEnv(helpers.TestCase): self.assertEqual({}, util.get_proc_env(1)) self.assertEqual(1, m_load_file.call_count) + def test_get_proc_ppid(self): + """get_proc_ppid returns correct parent pid value.""" + my_pid = os.getpid() + my_ppid = os.getppid() + self.assertEqual(my_ppid, util.get_proc_ppid(my_pid)) + # vi: ts=4 expandtab |