From d5f855dd96ccbea77f61b0515b574ad2c43d116d Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Wed, 9 Aug 2017 21:55:52 -0600 Subject: ec2: Allow Ec2 to run in init-local using dhclient in a sandbox. This branch is a prerequisite for IPv6 support in AWS by allowing Ec2 datasource to query the metadata source version 2016-09-02 about whether or not it needs to configure IPv6 on interfaces. If version 2016-09-02 is not present, fallback to the min_metadata_version of 2009-04-04. The DataSourceEc2Local not run on FreeBSD because dhclient in doesn't support the -sf flag allowing us to run dhclient without filesystem side-effects. To query AWS' metadata address @ 169.254.169.254, the instance must have a dhcp-allocated address configured. Configuring IPv4 link-local addresses result in timeouts from the metadata service. We introduced a DataSourceEc2Local subclass which will perform a sandboxed dhclient discovery which obtains an authorized IP address on eth0 and crawl metadata about full instance network configuration. Since ec2 IPv6 metadata is not sufficient in itself to tell us all the ipv6 knownledge we need, it only be used as a boolean to tell us which nics need IPv6. Cloud-init will then configure desired interfaces to DHCPv6 versus DHCPv4. Performance side note: Shifting the dhcp work into init-local for Ec2 actually gets us 1 second faster deployments by skipping init-network phase of alternate datasource checks because Ec2Local is configured in an ealier boot stage. In 3 test runs prior to this change: cloud-init runs were 5.5 seconds, with the change we now average 4.6 seconds. This efficiency could be even further improved if we avoiding dhcp discovery in order to talk to the metadata service from an AWS authorized dhcp address if there were some way to advertize the dhcp configuration via DMI/SMBIOS or system environment variables. Inspecting time costs of the dhclient setup/teardown in 3 live runs the time cost for the dhcp setup round trip on AWS is: test 1: 76 milliseconds dhcp discovery + metadata: 0.347 seconds metadata alone: 0.271 seconds test 2: 88 milliseconds dhcp discovery + metadata: 0.388 seconds metadata alone: 0.300 seconds test 3: 75 milliseconds dhcp discovery + metadata: 0.366 seconds metadata alone: 0.291 seconds LP: #1709772 --- cloudinit/net/__init__.py | 35 ++++++---- cloudinit/net/dhcp.py | 119 ++++++++++++++++++++++++++++++++ cloudinit/net/tests/test_dhcp.py | 144 +++++++++++++++++++++++++++++++++++++++ cloudinit/net/tests/test_init.py | 2 +- 4 files changed, 284 insertions(+), 16 deletions(-) create mode 100644 cloudinit/net/dhcp.py create mode 100644 cloudinit/net/tests/test_dhcp.py (limited to 'cloudinit/net') diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py index 1ff8fae0..a1b0db10 100644 --- a/cloudinit/net/__init__.py +++ b/cloudinit/net/__init__.py @@ -175,13 +175,8 @@ def is_disabled_cfg(cfg): return cfg.get('config') == "disabled" -def generate_fallback_config(blacklist_drivers=None, config_driver=None): - """Determine which attached net dev is most likely to have a connection and - generate network state to run dhcp on that interface""" - - if not config_driver: - config_driver = False - +def find_fallback_nic(blacklist_drivers=None): + """Return the name of the 'fallback' network device.""" if not blacklist_drivers: blacklist_drivers = [] @@ -233,15 +228,24 @@ def generate_fallback_config(blacklist_drivers=None, config_driver=None): if DEFAULT_PRIMARY_INTERFACE in names: names.remove(DEFAULT_PRIMARY_INTERFACE) names.insert(0, DEFAULT_PRIMARY_INTERFACE) - target_name = None - target_mac = None + + # pick the first that has a mac-address for name in names: - mac = read_sys_net_safe(name, 'address') - if mac: - target_name = name - target_mac = mac - break - if target_mac and target_name: + if read_sys_net_safe(name, 'address'): + return name + return None + + +def generate_fallback_config(blacklist_drivers=None, config_driver=None): + """Determine which attached net dev is most likely to have a connection and + generate network state to run dhcp on that interface""" + + if not config_driver: + config_driver = False + + target_name = find_fallback_nic(blacklist_drivers=blacklist_drivers) + if target_name: + target_mac = read_sys_net_safe(target_name, 'address') nconf = {'config': [], 'version': 1} cfg = {'type': 'physical', 'name': target_name, 'mac_address': target_mac, 'subnets': [{'type': 'dhcp'}]} @@ -585,6 +589,7 @@ class EphemeralIPv4Network(object): self._bringup_router() def __exit__(self, excp_type, excp_value, excp_traceback): + """Teardown anything we set up.""" for cmd in self.cleanup_cmds: util.subp(cmd, capture=True) diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py new file mode 100644 index 00000000..c7febc57 --- /dev/null +++ b/cloudinit/net/dhcp.py @@ -0,0 +1,119 @@ +# Copyright (C) 2017 Canonical Ltd. +# +# Author: Chad Smith +# +# This file is part of cloud-init. See LICENSE file for license information. + +import logging +import os +import re + +from cloudinit.net import find_fallback_nic, get_devicelist +from cloudinit import util + +LOG = logging.getLogger(__name__) + + +class InvalidDHCPLeaseFileError(Exception): + """Raised when parsing an empty or invalid dhcp.leases file. + + Current uses are DataSourceAzure and DataSourceEc2 during ephemeral + boot to scrape metadata. + """ + pass + + +def maybe_perform_dhcp_discovery(nic=None): + """Perform dhcp discovery if nic valid and dhclient command exists. + + If the nic is invalid or undiscoverable or dhclient command is not found, + skip dhcp_discovery and return an empty dict. + + @param nic: Name of the network interface we want to run dhclient on. + @return: A dict of dhcp options from the dhclient discovery if run, + otherwise an empty dict is returned. + """ + if nic is None: + nic = find_fallback_nic() + if nic is None: + LOG.debug( + 'Skip dhcp_discovery: Unable to find fallback nic.') + return {} + elif nic not in get_devicelist(): + LOG.debug( + 'Skip dhcp_discovery: nic %s not found in get_devicelist.', nic) + return {} + dhclient_path = util.which('dhclient') + if not dhclient_path: + LOG.debug('Skip dhclient configuration: No dhclient command found.') + return {} + with util.tempdir(prefix='cloud-init-dhcp-') as tmpdir: + return dhcp_discovery(dhclient_path, nic, tmpdir) + + +def parse_dhcp_lease_file(lease_file): + """Parse the given dhcp lease file for the most recent lease. + + Return a dict of dhcp options as key value pairs for the most recent lease + block. + + @raises: InvalidDHCPLeaseFileError on empty of unparseable leasefile + content. + """ + lease_regex = re.compile(r"lease {(?P[^}]*)}\n") + dhcp_leases = [] + lease_content = util.load_file(lease_file) + if len(lease_content) == 0: + raise InvalidDHCPLeaseFileError( + 'Cannot parse empty dhcp lease file {0}'.format(lease_file)) + for lease in lease_regex.findall(lease_content): + lease_options = [] + for line in lease.split(';'): + # Strip newlines, double-quotes and option prefix + line = line.strip().replace('"', '').replace('option ', '') + if not line: + continue + lease_options.append(line.split(' ', 1)) + dhcp_leases.append(dict(lease_options)) + if not dhcp_leases: + raise InvalidDHCPLeaseFileError( + 'Cannot parse dhcp lease file {0}. No leases found'.format( + lease_file)) + return dhcp_leases + + +def dhcp_discovery(dhclient_cmd_path, interface, cleandir): + """Run dhclient on the interface without scripts or filesystem artifacts. + + @param dhclient_cmd_path: Full path to the dhclient used. + @param interface: Name of the network inteface on which to dhclient. + @param cleandir: The directory from which to run dhclient as well as store + dhcp leases. + + @return: A dict of dhcp options parsed from the dhcp.leases file or empty + dict. + """ + LOG.debug('Performing a dhcp discovery on %s', interface) + + # XXX We copy dhclient out of /sbin/dhclient to avoid dealing with strict + # app armor profiles which disallow running dhclient -sf . + # We want to avoid running /sbin/dhclient-script because of side-effects in + # /etc/resolv.conf any any other vendor specific scripts in + # /etc/dhcp/dhclient*hooks.d. + sandbox_dhclient_cmd = os.path.join(cleandir, 'dhclient') + util.copy(dhclient_cmd_path, sandbox_dhclient_cmd) + pid_file = os.path.join(cleandir, 'dhclient.pid') + lease_file = os.path.join(cleandir, 'dhcp.leases') + + # ISC dhclient needs the interface up to send initial discovery packets. + # Generally dhclient relies on dhclient-script PREINIT action to bring the + # link up before attempting discovery. Since we are using -sf /bin/true, + # we need to do that "link up" ourselves first. + util.subp(['ip', 'link', 'set', 'dev', interface, 'up'], capture=True) + cmd = [sandbox_dhclient_cmd, '-1', '-v', '-lf', lease_file, + '-pf', pid_file, interface, '-sf', '/bin/true'] + util.subp(cmd, capture=True) + return parse_dhcp_lease_file(lease_file) + + +# vi: ts=4 expandtab diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py new file mode 100644 index 00000000..47d8d461 --- /dev/null +++ b/cloudinit/net/tests/test_dhcp.py @@ -0,0 +1,144 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import mock +import os +from textwrap import dedent + +from cloudinit.net.dhcp import ( + InvalidDHCPLeaseFileError, maybe_perform_dhcp_discovery, + parse_dhcp_lease_file, dhcp_discovery) +from cloudinit.util import ensure_file, write_file +from tests.unittests.helpers import CiTestCase + + +class TestParseDHCPLeasesFile(CiTestCase): + + def test_parse_empty_lease_file_errors(self): + """parse_dhcp_lease_file errors when file content is empty.""" + empty_file = self.tmp_path('leases') + ensure_file(empty_file) + with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager: + parse_dhcp_lease_file(empty_file) + error = context_manager.exception + self.assertIn('Cannot parse empty dhcp lease file', str(error)) + + def test_parse_malformed_lease_file_content_errors(self): + """parse_dhcp_lease_file errors when file content isn't dhcp leases.""" + non_lease_file = self.tmp_path('leases') + write_file(non_lease_file, 'hi mom.') + with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager: + parse_dhcp_lease_file(non_lease_file) + error = context_manager.exception + self.assertIn('Cannot parse dhcp lease file', str(error)) + + def test_parse_multiple_leases(self): + """parse_dhcp_lease_file returns a list of all leases within.""" + lease_file = self.tmp_path('leases') + content = dedent(""" + lease { + interface "wlp3s0"; + fixed-address 192.168.2.74; + option subnet-mask 255.255.255.0; + option routers 192.168.2.1; + renew 4 2017/07/27 18:02:30; + expire 5 2017/07/28 07:08:15; + } + lease { + interface "wlp3s0"; + fixed-address 192.168.2.74; + option subnet-mask 255.255.255.0; + option routers 192.168.2.1; + } + """) + expected = [ + {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74', + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1', + 'renew': '4 2017/07/27 18:02:30', + 'expire': '5 2017/07/28 07:08:15'}, + {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74', + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}] + write_file(lease_file, content) + self.assertItemsEqual(expected, parse_dhcp_lease_file(lease_file)) + + +class TestDHCPDiscoveryClean(CiTestCase): + with_logs = True + + @mock.patch('cloudinit.net.dhcp.find_fallback_nic') + def test_no_fallback_nic_found(self, m_fallback_nic): + """Log and do nothing when nic is absent and no fallback is found.""" + m_fallback_nic.return_value = None # No fallback nic found + self.assertEqual({}, maybe_perform_dhcp_discovery()) + self.assertIn( + 'Skip dhcp_discovery: Unable to find fallback nic.', + self.logs.getvalue()) + + def test_provided_nic_does_not_exist(self): + """When the provided nic doesn't exist, log a message and no-op.""" + self.assertEqual({}, maybe_perform_dhcp_discovery('idontexist')) + self.assertIn( + 'Skip dhcp_discovery: nic idontexist not found in get_devicelist.', + self.logs.getvalue()) + + @mock.patch('cloudinit.net.dhcp.util.which') + @mock.patch('cloudinit.net.dhcp.find_fallback_nic') + def test_absent_dhclient_command(self, m_fallback, m_which): + """When dhclient doesn't exist in the OS, log the issue and no-op.""" + m_fallback.return_value = 'eth9' + m_which.return_value = None # dhclient isn't found + self.assertEqual({}, maybe_perform_dhcp_discovery()) + self.assertIn( + 'Skip dhclient configuration: No dhclient command found.', + self.logs.getvalue()) + + @mock.patch('cloudinit.net.dhcp.dhcp_discovery') + @mock.patch('cloudinit.net.dhcp.util.which') + @mock.patch('cloudinit.net.dhcp.find_fallback_nic') + def test_dhclient_run_with_tmpdir(self, m_fallback, m_which, m_dhcp): + """maybe_perform_dhcp_discovery passes tmpdir to dhcp_discovery.""" + m_fallback.return_value = 'eth9' + m_which.return_value = '/sbin/dhclient' + m_dhcp.return_value = {'address': '192.168.2.2'} + self.assertEqual( + {'address': '192.168.2.2'}, maybe_perform_dhcp_discovery()) + m_dhcp.assert_called_once() + call = m_dhcp.call_args_list[0] + self.assertEqual('/sbin/dhclient', call[0][0]) + self.assertEqual('eth9', call[0][1]) + self.assertIn('/tmp/cloud-init-dhcp-', call[0][2]) + + @mock.patch('cloudinit.net.dhcp.util.subp') + def test_dhcp_discovery_run_in_sandbox(self, m_subp): + """dhcp_discovery brings up the interface and runs dhclient. + + It also returns the parsed dhcp.leases file generated in the sandbox. + """ + tmpdir = self.tmp_dir() + dhclient_script = os.path.join(tmpdir, 'dhclient.orig') + script_content = '#!/bin/bash\necho fake-dhclient' + write_file(dhclient_script, script_content, mode=0o755) + lease_content = dedent(""" + lease { + interface "eth9"; + fixed-address 192.168.2.74; + option subnet-mask 255.255.255.0; + option routers 192.168.2.1; + } + """) + lease_file = os.path.join(tmpdir, 'dhcp.leases') + write_file(lease_file, lease_content) + self.assertItemsEqual( + [{'interface': 'eth9', 'fixed-address': '192.168.2.74', + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}], + dhcp_discovery(dhclient_script, 'eth9', tmpdir)) + # dhclient script got copied + with open(os.path.join(tmpdir, 'dhclient')) as stream: + self.assertEqual(script_content, stream.read()) + # Interface was brought up before dhclient called from sandbox + m_subp.assert_has_calls([ + mock.call( + ['ip', 'link', 'set', 'dev', 'eth9', 'up'], capture=True), + mock.call( + [os.path.join(tmpdir, 'dhclient'), '-1', '-v', '-lf', + lease_file, '-pf', os.path.join(tmpdir, 'dhclient.pid'), + 'eth9', '-sf', '/bin/true'], capture=True)]) diff --git a/cloudinit/net/tests/test_init.py b/cloudinit/net/tests/test_init.py index 272a6ebd..cc052a7d 100644 --- a/cloudinit/net/tests/test_init.py +++ b/cloudinit/net/tests/test_init.py @@ -414,7 +414,7 @@ class TestEphemeralIPV4Network(CiTestCase): self.assertIn('Cannot init network on', str(error)) self.assertEqual(0, m_subp.call_count) - def test_ephemeral_ipv4_network_errors_invalid_mask(self, m_subp): + def test_ephemeral_ipv4_network_errors_invalid_mask_prefix(self, m_subp): """Raise an error when prefix_or_mask is not a netmask or prefix.""" params = { 'interface': 'eth0', 'ip': '192.168.2.2', -- cgit v1.2.3