diff options
author | Chad Smith <chad.smith@canonical.com> | 2017-11-30 13:09:30 -0700 |
---|---|---|
committer | Scott Moser <smoser@brickies.net> | 2017-11-30 21:51:14 -0500 |
commit | 7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db (patch) | |
tree | 521cc4ebca1fb7b66f6e1dba97e09b4861bb72d7 /cloudinit/net | |
parent | 88368f9851b29dddb5a12e4b21868cbdef906c5c (diff) | |
download | vyos-cloud-init-7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db.tar.gz vyos-cloud-init-7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db.zip |
ec2: Fix sandboxed dhclient background process cleanup.
There is a race condition where our sandboxed dhclient properly writes a
lease file but has not yet written a pid file. If the sandbox temporary
directory is torn down before the dhclient subprocess writes a pidfile
DataSourceEc2Local gets a traceback and the instance will fallback to
DataSourceEc2 in the init-network stage. This wastes boot cycles we'd
rather not spend.
Fix handling of sandboxed dhclient to wait for both pidfile and leasefile
before proceding. If either file doesn't show in 5 seconds, log a warning
and return empty lease results {}.
LP: #1735331
Diffstat (limited to 'cloudinit/net')
-rw-r--r-- | cloudinit/net/dhcp.py | 44 | ||||
-rw-r--r-- | cloudinit/net/tests/test_dhcp.py | 66 |
2 files changed, 90 insertions, 20 deletions
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index d8624d82..875a4609 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -36,22 +36,23 @@ def maybe_perform_dhcp_discovery(nic=None): skip dhcp_discovery and return an empty dict. @param nic: Name of the network interface we want to run dhclient on. - @return: A dict of dhcp options from the dhclient discovery if run, - otherwise an empty dict is returned. + @return: A list of dicts representing dhcp options for each lease obtained + from the dhclient discovery if run, otherwise an empty list is + returned. """ if nic is None: nic = find_fallback_nic() if nic is None: LOG.debug('Skip dhcp_discovery: Unable to find fallback nic.') - return {} + return [] elif nic not in get_devicelist(): LOG.debug( 'Skip dhcp_discovery: nic %s not found in get_devicelist.', nic) - return {} + return [] dhclient_path = util.which('dhclient') if not dhclient_path: LOG.debug('Skip dhclient configuration: No dhclient command found.') - return {} + return [] with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir: # Use /var/tmp because /run/cloud-init/tmp is mounted noexec return dhcp_discovery(dhclient_path, nic, tdir) @@ -60,8 +61,8 @@ def maybe_perform_dhcp_discovery(nic=None): def parse_dhcp_lease_file(lease_file): """Parse the given dhcp lease file for the most recent lease. - Return a dict of dhcp options as key value pairs for the most recent lease - block. + Return a list of dicts of dhcp options. Each dict contains key value pairs + a specific lease in order from oldest to newest. @raises: InvalidDHCPLeaseFileError on empty of unparseable leasefile content. @@ -96,8 +97,8 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir): @param cleandir: The directory from which to run dhclient as well as store dhcp leases. - @return: A dict of dhcp options parsed from the dhcp.leases file or empty - dict. + @return: A list of dicts of representing the dhcp leases parsed from the + dhcp.leases file or empty list. """ LOG.debug('Performing a dhcp discovery on %s', interface) @@ -119,13 +120,26 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir): cmd = [sandbox_dhclient_cmd, '-1', '-v', '-lf', lease_file, '-pf', pid_file, interface, '-sf', '/bin/true'] util.subp(cmd, capture=True) - pid = None + + # dhclient doesn't write a pid file until after it forks when it gets a + # proper lease response. Since cleandir is a temp directory that gets + # removed, we need to wait for that pidfile creation before the + # cleandir is removed, otherwise we get FileNotFound errors. + missing = util.wait_for_files( + [pid_file, lease_file], maxwait=5, naplen=0.01) + if missing: + LOG.warning("dhclient did not produce expected files: %s", + ', '.join(os.path.basename(f) for f in missing)) + return [] + pid_content = util.load_file(pid_file).strip() try: - pid = int(util.load_file(pid_file).strip()) - return parse_dhcp_lease_file(lease_file) - finally: - if pid: - os.kill(pid, signal.SIGKILL) + pid = int(pid_content) + except ValueError: + LOG.debug( + "pid file contains non-integer content '%s'", pid_content) + else: + os.kill(pid, signal.SIGKILL) + return parse_dhcp_lease_file(lease_file) def networkd_parse_lease(content): diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py index 3d8e15c0..db25b6f2 100644 --- a/cloudinit/net/tests/test_dhcp.py +++ b/cloudinit/net/tests/test_dhcp.py @@ -1,6 +1,5 @@ # This file is part of cloud-init. See LICENSE file for license information. -import mock import os import signal from textwrap import dedent @@ -9,7 +8,8 @@ from cloudinit.net.dhcp import ( InvalidDHCPLeaseFileError, maybe_perform_dhcp_discovery, parse_dhcp_lease_file, dhcp_discovery, networkd_load_leases) from cloudinit.util import ensure_file, write_file -from cloudinit.tests.helpers import CiTestCase, wrap_and_call, populate_dir +from cloudinit.tests.helpers import ( + CiTestCase, mock, populate_dir, wrap_and_call) class TestParseDHCPLeasesFile(CiTestCase): @@ -69,14 +69,14 @@ class TestDHCPDiscoveryClean(CiTestCase): def test_no_fallback_nic_found(self, m_fallback_nic): """Log and do nothing when nic is absent and no fallback is found.""" m_fallback_nic.return_value = None # No fallback nic found - self.assertEqual({}, maybe_perform_dhcp_discovery()) + self.assertEqual([], maybe_perform_dhcp_discovery()) self.assertIn( 'Skip dhcp_discovery: Unable to find fallback nic.', self.logs.getvalue()) def test_provided_nic_does_not_exist(self): """When the provided nic doesn't exist, log a message and no-op.""" - self.assertEqual({}, maybe_perform_dhcp_discovery('idontexist')) + self.assertEqual([], maybe_perform_dhcp_discovery('idontexist')) self.assertIn( 'Skip dhcp_discovery: nic idontexist not found in get_devicelist.', self.logs.getvalue()) @@ -87,7 +87,7 @@ class TestDHCPDiscoveryClean(CiTestCase): """When dhclient doesn't exist in the OS, log the issue and no-op.""" m_fallback.return_value = 'eth9' m_which.return_value = None # dhclient isn't found - self.assertEqual({}, maybe_perform_dhcp_discovery()) + self.assertEqual([], maybe_perform_dhcp_discovery()) self.assertIn( 'Skip dhclient configuration: No dhclient command found.', self.logs.getvalue()) @@ -117,6 +117,62 @@ class TestDHCPDiscoveryClean(CiTestCase): @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.subp') + def test_dhcp_discovery_run_in_sandbox_warns_invalid_pid(self, m_subp, + m_kill): + """dhcp_discovery logs a warning when pidfile contains invalid content. + + Lease processing still occurs and no proc kill is attempted. + """ + tmpdir = self.tmp_dir() + dhclient_script = os.path.join(tmpdir, 'dhclient.orig') + script_content = '#!/bin/bash\necho fake-dhclient' + write_file(dhclient_script, script_content, mode=0o755) + write_file(self.tmp_path('dhclient.pid', tmpdir), '') # Empty pid '' + lease_content = dedent(""" + lease { + interface "eth9"; + fixed-address 192.168.2.74; + option subnet-mask 255.255.255.0; + option routers 192.168.2.1; + } + """) + write_file(self.tmp_path('dhcp.leases', tmpdir), lease_content) + + self.assertItemsEqual( + [{'interface': 'eth9', 'fixed-address': '192.168.2.74', + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}], + dhcp_discovery(dhclient_script, 'eth9', tmpdir)) + self.assertIn( + "pid file contains non-integer content ''", self.logs.getvalue()) + m_kill.assert_not_called() + + @mock.patch('cloudinit.net.dhcp.os.kill') + @mock.patch('cloudinit.net.dhcp.util.wait_for_files') + @mock.patch('cloudinit.net.dhcp.util.subp') + def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self, + m_subp, + m_wait, + m_kill): + """dhcp_discovery waits for the presence of pidfile and dhcp.leases.""" + tmpdir = self.tmp_dir() + dhclient_script = os.path.join(tmpdir, 'dhclient.orig') + script_content = '#!/bin/bash\necho fake-dhclient' + write_file(dhclient_script, script_content, mode=0o755) + # Don't create pid or leases file + pidfile = self.tmp_path('dhclient.pid', tmpdir) + leasefile = self.tmp_path('dhcp.leases', tmpdir) + m_wait.return_value = [pidfile] # Return the missing pidfile wait for + self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir)) + self.assertEqual( + mock.call([pidfile, leasefile], maxwait=5, naplen=0.01), + m_wait.call_args_list[0]) + self.assertIn( + 'WARNING: dhclient did not produce expected files: dhclient.pid', + self.logs.getvalue()) + m_kill.assert_not_called() + + @mock.patch('cloudinit.net.dhcp.os.kill') + @mock.patch('cloudinit.net.dhcp.util.subp') def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill): """dhcp_discovery brings up the interface and runs dhclient. |