From d3a0958c09c73a78fda6e922b749a1b98036e984 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Sun, 19 Nov 2017 16:27:06 -0700 Subject: EC2: Kill dhclient process used in sandbox dhclient. dhclient runs, obtains a address and then backgrounds itself. cloud-init did not take care to kill it after it was done with it. After it has run and created the leases, we can kill it. LP: #1732964 --- cloudinit/net/dhcp.py | 9 ++++++++- cloudinit/net/tests/test_dhcp.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'cloudinit/net') diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index 0cba7032..f3a412a9 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -8,6 +8,7 @@ import configobj import logging import os import re +import signal from cloudinit.net import find_fallback_nic, get_devicelist from cloudinit import temp_utils @@ -119,7 +120,13 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir): cmd = [sandbox_dhclient_cmd, '-1', '-v', '-lf', lease_file, '-pf', pid_file, interface, '-sf', '/bin/true'] util.subp(cmd, capture=True) - return parse_dhcp_lease_file(lease_file) + pid = None + try: + pid = int(util.load_file(pid_file).strip()) + return parse_dhcp_lease_file(lease_file) + finally: + if pid: + os.kill(pid, signal.SIGKILL) def networkd_parse_lease(content): diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py index 1c1f504a..3d8e15c0 100644 --- a/cloudinit/net/tests/test_dhcp.py +++ b/cloudinit/net/tests/test_dhcp.py @@ -2,6 +2,7 @@ import mock import os +import signal from textwrap import dedent from cloudinit.net.dhcp import ( @@ -114,8 +115,9 @@ class TestDHCPDiscoveryClean(CiTestCase): self.assertEqual('eth9', call[0][1]) self.assertIn('/var/tmp/cloud-init/cloud-init-dhcp-', call[0][2]) + @mock.patch('cloudinit.net.dhcp.os.kill') @mock.patch('cloudinit.net.dhcp.util.subp') - def test_dhcp_discovery_run_in_sandbox(self, m_subp): + def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill): """dhcp_discovery brings up the interface and runs dhclient. It also returns the parsed dhcp.leases file generated in the sandbox. @@ -134,6 +136,10 @@ class TestDHCPDiscoveryClean(CiTestCase): """) lease_file = os.path.join(tmpdir, 'dhcp.leases') write_file(lease_file, lease_content) + pid_file = os.path.join(tmpdir, 'dhclient.pid') + my_pid = 1 + write_file(pid_file, "%d\n" % my_pid) + self.assertItemsEqual( [{'interface': 'eth9', 'fixed-address': '192.168.2.74', 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}], @@ -149,6 +155,7 @@ class TestDHCPDiscoveryClean(CiTestCase): [os.path.join(tmpdir, 'dhclient'), '-1', '-v', '-lf', lease_file, '-pf', os.path.join(tmpdir, 'dhclient.pid'), 'eth9', '-sf', '/bin/true'], capture=True)]) + m_kill.assert_has_calls([mock.call(my_pid, signal.SIGKILL)]) class TestSystemdParseLeases(CiTestCase): -- cgit v1.2.3 From 281a82181716183d526e76f4e0415e0a6f680cbe Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 20 Nov 2017 15:56:40 -0500 Subject: EC2: Fix bug using fallback_nic and metadata when restoring from cache. If user upgraded to new cloud-init and attempted to run 'cloud-init init' without rebooting, cloud-init restores the datasource object from pickle. The older version pickled datasource object had no value for _network_config or fallback_nic. This caused the Ec2 datasource to attempt to reconfigure networking with a None fallback_nic. The pickled object also cached an older version of ec2 metadata which didn't contain network information. This branch does two things: - Add a fallback_interface property to DatasourceEC2 to support reading the old .fallback_nic attribute if it was set. New versions will call net.find_fallback_nic() if there has not been one found. - Re-crawl metadata if we are on Ec2 and don't have a 'network' key in metadata LP: #1732917 --- cloudinit/net/dhcp.py | 3 +- cloudinit/sources/DataSourceEc2.py | 44 +++++++++++++++++++++-------- tests/unittests/test_datasource/test_ec2.py | 33 ++++++++++++++++++++++ 3 files changed, 67 insertions(+), 13 deletions(-) (limited to 'cloudinit/net') diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index f3a412a9..d8624d82 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -42,8 +42,7 @@ def maybe_perform_dhcp_discovery(nic=None): if nic is None: nic = find_fallback_nic() if nic is None: - LOG.debug( - 'Skip dhcp_discovery: Unable to find fallback nic.') + LOG.debug('Skip dhcp_discovery: Unable to find fallback nic.') return {} elif nic not in get_devicelist(): LOG.debug( diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 0ef22174..7bbbfb63 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -65,7 +65,7 @@ class DataSourceEc2(sources.DataSource): get_network_metadata = False # Track the discovered fallback nic for use in configuration generation. - fallback_nic = None + _fallback_interface = None def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -92,18 +92,17 @@ class DataSourceEc2(sources.DataSource): elif self.cloud_platform == Platforms.NO_EC2_METADATA: return False - self.fallback_nic = net.find_fallback_nic() if self.get_network_metadata: # Setup networking in init-local stage. if util.is_FreeBSD(): LOG.debug("FreeBSD doesn't support running dhclient with -sf") return False - dhcp_leases = dhcp.maybe_perform_dhcp_discovery(self.fallback_nic) + dhcp_leases = dhcp.maybe_perform_dhcp_discovery( + self.fallback_interface) if not dhcp_leases: # DataSourceEc2Local failed in init-local stage. DataSourceEc2 # will still run in init-network stage. return False dhcp_opts = dhcp_leases[-1] - self.fallback_nic = dhcp_opts.get('interface') net_params = {'interface': dhcp_opts.get('interface'), 'ip': dhcp_opts.get('fixed-address'), 'prefix_or_mask': dhcp_opts.get('subnet-mask'), @@ -301,21 +300,44 @@ class DataSourceEc2(sources.DataSource): return None result = None - net_md = self.metadata.get('network') + no_network_metadata_on_aws = bool( + 'network' not in self.metadata and + self.cloud_platform == Platforms.AWS) + if no_network_metadata_on_aws: + LOG.debug("Metadata 'network' not present:" + " Refreshing stale metadata from prior to upgrade.") + util.log_time( + logfunc=LOG.debug, msg='Re-crawl of metadata service', + func=self._crawl_metadata) + # Limit network configuration to only the primary/fallback nic - macs_to_nics = { - net.get_interface_mac(self.fallback_nic): self.fallback_nic} + iface = self.fallback_interface + macs_to_nics = {net.get_interface_mac(iface): iface} + net_md = self.metadata.get('network') if isinstance(net_md, dict): result = convert_ec2_metadata_network_config( - net_md, macs_to_nics=macs_to_nics, - fallback_nic=self.fallback_nic) + net_md, macs_to_nics=macs_to_nics, fallback_nic=iface) else: - LOG.warning("unexpected metadata 'network' key not valid: %s", - net_md) + LOG.warning("Metadata 'network' key not valid: %s.", net_md) self._network_config = result return self._network_config + @property + def fallback_interface(self): + if self._fallback_interface is None: + # fallback_nic was used at one point, so restored objects may + # have an attribute there. respect that if found. + _legacy_fbnic = getattr(self, 'fallback_nic', None) + if _legacy_fbnic: + self._fallback_interface = _legacy_fbnic + self.fallback_nic = None + else: + self._fallback_interface = net.find_fallback_nic() + if self._fallback_interface is None: + LOG.warning("Did not find a fallback interface on EC2.") + return self._fallback_interface + def _crawl_metadata(self): """Crawl metadata service when available. diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 6af699a6..ba328ee9 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -305,6 +305,39 @@ class TestEc2(test_helpers.HttprettyTestCase): ds._network_config = {'cached': 'data'} self.assertEqual({'cached': 'data'}, ds.network_config) + @httpretty.activate + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') + def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): + """Refresh the network_config Ec2 cache if network key is absent. + + This catches an upgrade issue where obj.pkl contained stale metadata + which lacked newly required network key. + """ + old_metadata = copy.deepcopy(DEFAULT_METADATA) + old_metadata.pop('network') + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': True}}}, + md=old_metadata) + self.assertTrue(ds.get_data()) + # Provide new revision of metadata that contains network data + register_mock_metaserver( + 'http://169.254.169.254/2009-04-04/meta-data/', DEFAULT_METADATA) + mac1 = '06:17:04:d7:26:09' # Defined in DEFAULT_METADATA + get_interface_mac_path = ( + 'cloudinit.sources.DataSourceEc2.net.get_interface_mac') + ds.fallback_nic = 'eth9' + with mock.patch(get_interface_mac_path) as m_get_interface_mac: + m_get_interface_mac.return_value = mac1 + ds.network_config # Will re-crawl network metadata + self.assertIn('Re-crawl of metadata service', self.logs.getvalue()) + expected = {'version': 1, 'config': [ + {'mac_address': '06:17:04:d7:26:09', + 'name': 'eth9', + 'subnets': [{'type': 'dhcp4'}, {'type': 'dhcp6'}], + 'type': 'physical'}]} + self.assertEqual(expected, ds.network_config) + @httpretty.activate @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_valid_platform_with_strict_true(self, m_dhcp): -- cgit v1.2.3