summaryrefslogtreecommitdiff
path: root/cloudinit/net
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2017-11-30 13:09:30 -0700
committerScott Moser <smoser@brickies.net>2017-11-30 21:51:14 -0500
commit7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db (patch)
tree521cc4ebca1fb7b66f6e1dba97e09b4861bb72d7 /cloudinit/net
parent88368f9851b29dddb5a12e4b21868cbdef906c5c (diff)
downloadvyos-cloud-init-7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db.tar.gz
vyos-cloud-init-7acc9e68fafbbd7c56587aebe752ba6ba8c8a3db.zip
ec2: Fix sandboxed dhclient background process cleanup.
There is a race condition where our sandboxed dhclient properly writes a lease file but has not yet written a pid file. If the sandbox temporary directory is torn down before the dhclient subprocess writes a pidfile DataSourceEc2Local gets a traceback and the instance will fallback to DataSourceEc2 in the init-network stage. This wastes boot cycles we'd rather not spend. Fix handling of sandboxed dhclient to wait for both pidfile and leasefile before proceding. If either file doesn't show in 5 seconds, log a warning and return empty lease results {}. LP: #1735331
Diffstat (limited to 'cloudinit/net')
-rw-r--r--cloudinit/net/dhcp.py44
-rw-r--r--cloudinit/net/tests/test_dhcp.py66
2 files changed, 90 insertions, 20 deletions
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
index d8624d82..875a4609 100644
--- a/cloudinit/net/dhcp.py
+++ b/cloudinit/net/dhcp.py
@@ -36,22 +36,23 @@ def maybe_perform_dhcp_discovery(nic=None):
skip dhcp_discovery and return an empty dict.
@param nic: Name of the network interface we want to run dhclient on.
- @return: A dict of dhcp options from the dhclient discovery if run,
- otherwise an empty dict is returned.
+ @return: A list of dicts representing dhcp options for each lease obtained
+ from the dhclient discovery if run, otherwise an empty list is
+ returned.
"""
if nic is None:
nic = find_fallback_nic()
if nic is None:
LOG.debug('Skip dhcp_discovery: Unable to find fallback nic.')
- return {}
+ return []
elif nic not in get_devicelist():
LOG.debug(
'Skip dhcp_discovery: nic %s not found in get_devicelist.', nic)
- return {}
+ return []
dhclient_path = util.which('dhclient')
if not dhclient_path:
LOG.debug('Skip dhclient configuration: No dhclient command found.')
- return {}
+ return []
with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
# Use /var/tmp because /run/cloud-init/tmp is mounted noexec
return dhcp_discovery(dhclient_path, nic, tdir)
@@ -60,8 +61,8 @@ def maybe_perform_dhcp_discovery(nic=None):
def parse_dhcp_lease_file(lease_file):
"""Parse the given dhcp lease file for the most recent lease.
- Return a dict of dhcp options as key value pairs for the most recent lease
- block.
+ Return a list of dicts of dhcp options. Each dict contains key value pairs
+ a specific lease in order from oldest to newest.
@raises: InvalidDHCPLeaseFileError on empty of unparseable leasefile
content.
@@ -96,8 +97,8 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
@param cleandir: The directory from which to run dhclient as well as store
dhcp leases.
- @return: A dict of dhcp options parsed from the dhcp.leases file or empty
- dict.
+ @return: A list of dicts of representing the dhcp leases parsed from the
+ dhcp.leases file or empty list.
"""
LOG.debug('Performing a dhcp discovery on %s', interface)
@@ -119,13 +120,26 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
cmd = [sandbox_dhclient_cmd, '-1', '-v', '-lf', lease_file,
'-pf', pid_file, interface, '-sf', '/bin/true']
util.subp(cmd, capture=True)
- pid = None
+
+ # dhclient doesn't write a pid file until after it forks when it gets a
+ # proper lease response. Since cleandir is a temp directory that gets
+ # removed, we need to wait for that pidfile creation before the
+ # cleandir is removed, otherwise we get FileNotFound errors.
+ missing = util.wait_for_files(
+ [pid_file, lease_file], maxwait=5, naplen=0.01)
+ if missing:
+ LOG.warning("dhclient did not produce expected files: %s",
+ ', '.join(os.path.basename(f) for f in missing))
+ return []
+ pid_content = util.load_file(pid_file).strip()
try:
- pid = int(util.load_file(pid_file).strip())
- return parse_dhcp_lease_file(lease_file)
- finally:
- if pid:
- os.kill(pid, signal.SIGKILL)
+ pid = int(pid_content)
+ except ValueError:
+ LOG.debug(
+ "pid file contains non-integer content '%s'", pid_content)
+ else:
+ os.kill(pid, signal.SIGKILL)
+ return parse_dhcp_lease_file(lease_file)
def networkd_parse_lease(content):
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
index 3d8e15c0..db25b6f2 100644
--- a/cloudinit/net/tests/test_dhcp.py
+++ b/cloudinit/net/tests/test_dhcp.py
@@ -1,6 +1,5 @@
# This file is part of cloud-init. See LICENSE file for license information.
-import mock
import os
import signal
from textwrap import dedent
@@ -9,7 +8,8 @@ from cloudinit.net.dhcp import (
InvalidDHCPLeaseFileError, maybe_perform_dhcp_discovery,
parse_dhcp_lease_file, dhcp_discovery, networkd_load_leases)
from cloudinit.util import ensure_file, write_file
-from cloudinit.tests.helpers import CiTestCase, wrap_and_call, populate_dir
+from cloudinit.tests.helpers import (
+ CiTestCase, mock, populate_dir, wrap_and_call)
class TestParseDHCPLeasesFile(CiTestCase):
@@ -69,14 +69,14 @@ class TestDHCPDiscoveryClean(CiTestCase):
def test_no_fallback_nic_found(self, m_fallback_nic):
"""Log and do nothing when nic is absent and no fallback is found."""
m_fallback_nic.return_value = None # No fallback nic found
- self.assertEqual({}, maybe_perform_dhcp_discovery())
+ self.assertEqual([], maybe_perform_dhcp_discovery())
self.assertIn(
'Skip dhcp_discovery: Unable to find fallback nic.',
self.logs.getvalue())
def test_provided_nic_does_not_exist(self):
"""When the provided nic doesn't exist, log a message and no-op."""
- self.assertEqual({}, maybe_perform_dhcp_discovery('idontexist'))
+ self.assertEqual([], maybe_perform_dhcp_discovery('idontexist'))
self.assertIn(
'Skip dhcp_discovery: nic idontexist not found in get_devicelist.',
self.logs.getvalue())
@@ -87,7 +87,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
"""When dhclient doesn't exist in the OS, log the issue and no-op."""
m_fallback.return_value = 'eth9'
m_which.return_value = None # dhclient isn't found
- self.assertEqual({}, maybe_perform_dhcp_discovery())
+ self.assertEqual([], maybe_perform_dhcp_discovery())
self.assertIn(
'Skip dhclient configuration: No dhclient command found.',
self.logs.getvalue())
@@ -117,6 +117,62 @@ class TestDHCPDiscoveryClean(CiTestCase):
@mock.patch('cloudinit.net.dhcp.os.kill')
@mock.patch('cloudinit.net.dhcp.util.subp')
+ def test_dhcp_discovery_run_in_sandbox_warns_invalid_pid(self, m_subp,
+ m_kill):
+ """dhcp_discovery logs a warning when pidfile contains invalid content.
+
+ Lease processing still occurs and no proc kill is attempted.
+ """
+ tmpdir = self.tmp_dir()
+ dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
+ script_content = '#!/bin/bash\necho fake-dhclient'
+ write_file(dhclient_script, script_content, mode=0o755)
+ write_file(self.tmp_path('dhclient.pid', tmpdir), '') # Empty pid ''
+ lease_content = dedent("""
+ lease {
+ interface "eth9";
+ fixed-address 192.168.2.74;
+ option subnet-mask 255.255.255.0;
+ option routers 192.168.2.1;
+ }
+ """)
+ write_file(self.tmp_path('dhcp.leases', tmpdir), lease_content)
+
+ self.assertItemsEqual(
+ [{'interface': 'eth9', 'fixed-address': '192.168.2.74',
+ 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
+ dhcp_discovery(dhclient_script, 'eth9', tmpdir))
+ self.assertIn(
+ "pid file contains non-integer content ''", self.logs.getvalue())
+ m_kill.assert_not_called()
+
+ @mock.patch('cloudinit.net.dhcp.os.kill')
+ @mock.patch('cloudinit.net.dhcp.util.wait_for_files')
+ @mock.patch('cloudinit.net.dhcp.util.subp')
+ def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
+ m_subp,
+ m_wait,
+ m_kill):
+ """dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
+ tmpdir = self.tmp_dir()
+ dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
+ script_content = '#!/bin/bash\necho fake-dhclient'
+ write_file(dhclient_script, script_content, mode=0o755)
+ # Don't create pid or leases file
+ pidfile = self.tmp_path('dhclient.pid', tmpdir)
+ leasefile = self.tmp_path('dhcp.leases', tmpdir)
+ m_wait.return_value = [pidfile] # Return the missing pidfile wait for
+ self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
+ self.assertEqual(
+ mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
+ m_wait.call_args_list[0])
+ self.assertIn(
+ 'WARNING: dhclient did not produce expected files: dhclient.pid',
+ self.logs.getvalue())
+ m_kill.assert_not_called()
+
+ @mock.patch('cloudinit.net.dhcp.os.kill')
+ @mock.patch('cloudinit.net.dhcp.util.subp')
def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
"""dhcp_discovery brings up the interface and runs dhclient.