From e56b55452549cb037da0a4165154ffa494e9678a Mon Sep 17 00:00:00 2001 From: Thomas Stringer Date: Thu, 10 Sep 2020 14:29:54 -0400 Subject: Retrieve SSH keys from IMDS first with OVF as a fallback (#509) * pull ssh keys from imds first and fall back to ovf if unavailable * refactor log and diagnostic messages * refactor the OpenSSLManager instantiation and certificate usage * fix unit test where exception was being silenced for generate cert * fix tests now that certificate is not always generated * add documentation for ssh key retrieval * add ability to check if http client has security enabled * refactor certificate logic to GoalState --- tests/unittests/test_datasource/test_azure.py | 64 ++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 10 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 47e03bd1..2dda9925 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -102,7 +102,13 @@ NETWORK_METADATA = { "vmId": "ff702a6b-cb6a-4fcd-ad68-b4ce38227642", "vmScaleSetName": "", "vmSize": "Standard_DS1_v2", - "zone": "" + "zone": "", + "publicKeys": [ + { + "keyData": "key1", + "path": "path1" + } + ] }, "network": { "interface": [ @@ -302,7 +308,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): def setUp(self): super(TestGetMetadataFromIMDS, self).setUp() - self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2017-12-01" + self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2019-06-01" @mock.patch(MOCKPATH + 'readurl') @mock.patch(MOCKPATH + 'EphemeralDHCPv4') @@ -1304,6 +1310,40 @@ scbus-1 on xpt0 bus 0 dsaz.get_hostname(hostname_command=("hostname",)) m_subp.assert_called_once_with(("hostname",), capture=True) + @mock.patch( + 'cloudinit.sources.helpers.azure.OpenSSLManager.parse_certificates') + def test_get_public_ssh_keys_with_imds(self, m_parse_certificates): + sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} + odata = {'HostName': "myhost", 'UserName': "myuser"} + data = { + 'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': sys_cfg + } + dsrc = self._get_ds(data) + dsrc.get_data() + dsrc.setup(True) + ssh_keys = dsrc.get_public_ssh_keys() + self.assertEqual(ssh_keys, ['key1']) + self.assertEqual(m_parse_certificates.call_count, 0) + + @mock.patch(MOCKPATH + 'get_metadata_from_imds') + def test_get_public_ssh_keys_without_imds( + self, + m_get_metadata_from_imds): + m_get_metadata_from_imds.return_value = dict() + sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} + odata = {'HostName': "myhost", 'UserName': "myuser"} + data = { + 'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': sys_cfg + } + dsrc = self._get_ds(data) + dsaz.get_metadata_from_fabric.return_value = {'public-keys': ['key2']} + dsrc.get_data() + dsrc.setup(True) + ssh_keys = dsrc.get_public_ssh_keys() + self.assertEqual(ssh_keys, ['key2']) + class TestAzureBounce(CiTestCase): @@ -2094,14 +2134,18 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): md, _ud, cfg, _d = dsa._reprovision() self.assertEqual(md['local-hostname'], hostname) self.assertEqual(cfg['system_info']['default_user']['name'], username) - self.assertEqual(fake_resp.call_args_list, - [mock.call(allow_redirects=True, - headers={'Metadata': 'true', - 'User-Agent': - 'Cloud-Init/%s' % vs()}, - method='GET', - timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS, - url=full_url)]) + self.assertIn( + mock.call( + allow_redirects=True, + headers={ + 'Metadata': 'true', + 'User-Agent': 'Cloud-Init/%s' % vs() + }, + method='GET', + timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS, + url=full_url + ), + fake_resp.call_args_list) self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9', -- cgit v1.2.3 From 43164902dc97cc0c51ca1b200fa09c9303a4beee Mon Sep 17 00:00:00 2001 From: Johnson Shi Date: Thu, 24 Sep 2020 09:46:19 -0700 Subject: Azure parse_network_config uses fallback cfg when generate IMDS network cfg fails (#549) Azure datasource's `parse_network_config` throws a fatal uncaught exception when an exception is raised during generation of network config from IMDS metadata. This happens when IMDS metadata is invalid/corrupted (such as when it is missing network or interface metadata). This causes the rest of provisioning to fail. This changes `parse_network_config` to be a non-fatal implementation. Additionally, when generating network config from IMDS metadata fails, fall back on generating fallback network config (`_generate_network_config_from_fallback_config`). This also changes fallback network config generation (`_generate_network_config_from_fallback_config`) to blacklist an additional driver: `mlx5_core`. --- cloudinit/sources/DataSourceAzure.py | 154 +++++++++++++++----------- tests/unittests/test_datasource/test_azure.py | 130 ++++++++++++---------- 2 files changed, 164 insertions(+), 120 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index e98fd497..773c60d7 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -1387,76 +1387,104 @@ def load_azure_ds_dir(source_dir): return (md, ud, cfg, {'ovf-env.xml': contents}) -def parse_network_config(imds_metadata): +@azure_ds_telemetry_reporter +def parse_network_config(imds_metadata) -> dict: """Convert imds_metadata dictionary to network v2 configuration. - Parses network configuration from imds metadata if present or generate fallback network config excluding mlx4_core devices. @param: imds_metadata: Dict of content read from IMDS network service. @return: Dictionary containing network version 2 standard configuration. """ - with events.ReportEventStack( - name="parse_network_config", - description="", - parent=azure_ds_reporter - ) as evt: - if imds_metadata != sources.UNSET and imds_metadata: - netconfig = {'version': 2, 'ethernets': {}} - LOG.debug('Azure: generating network configuration from IMDS') - network_metadata = imds_metadata['network'] - for idx, intf in enumerate(network_metadata['interface']): - # First IPv4 and/or IPv6 address will be obtained via DHCP. - # Any additional IPs of each type will be set as static - # addresses. - nicname = 'eth{idx}'.format(idx=idx) - dhcp_override = {'route-metric': (idx + 1) * 100} - dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override, - 'dhcp6': False} - for addr_type in ('ipv4', 'ipv6'): - addresses = intf.get(addr_type, {}).get('ipAddress', []) - if addr_type == 'ipv4': - default_prefix = '24' - else: - default_prefix = '128' - if addresses: - dev_config['dhcp6'] = True - # non-primary interfaces should have a higher - # route-metric (cost) so default routes prefer - # primary nic due to lower route-metric value - dev_config['dhcp6-overrides'] = dhcp_override - for addr in addresses[1:]: - # Append static address config for ip > 1 - netPrefix = intf[addr_type]['subnet'][0].get( - 'prefix', default_prefix) - privateIp = addr['privateIpAddress'] - if not dev_config.get('addresses'): - dev_config['addresses'] = [] - dev_config['addresses'].append( - '{ip}/{prefix}'.format( - ip=privateIp, prefix=netPrefix)) - if dev_config: - mac = ':'.join(re.findall(r'..', intf['macAddress'])) - dev_config.update({ - 'match': {'macaddress': mac.lower()}, - 'set-name': nicname - }) - # With netvsc, we can get two interfaces that - # share the same MAC, so we need to make sure - # our match condition also contains the driver - driver = device_driver(nicname) - if driver and driver == 'hv_netvsc': - dev_config['match']['driver'] = driver - netconfig['ethernets'][nicname] = dev_config - evt.description = "network config from imds" - else: - blacklist = ['mlx4_core'] - LOG.debug('Azure: generating fallback configuration') - # generate a network config, blacklist picking mlx4_core devs - netconfig = net.generate_fallback_config( - blacklist_drivers=blacklist, config_driver=True) - evt.description = "network config from fallback" - return netconfig + if imds_metadata != sources.UNSET and imds_metadata: + try: + return _generate_network_config_from_imds_metadata(imds_metadata) + except Exception as e: + LOG.error( + 'Failed generating network config ' + 'from IMDS network metadata: %s', str(e)) + try: + return _generate_network_config_from_fallback_config() + except Exception as e: + LOG.error('Failed generating fallback network config: %s', str(e)) + return {} + + +@azure_ds_telemetry_reporter +def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: + """Convert imds_metadata dictionary to network v2 configuration. + Parses network configuration from imds metadata. + + @param: imds_metadata: Dict of content read from IMDS network service. + @return: Dictionary containing network version 2 standard configuration. + """ + netconfig = {'version': 2, 'ethernets': {}} + network_metadata = imds_metadata['network'] + for idx, intf in enumerate(network_metadata['interface']): + # First IPv4 and/or IPv6 address will be obtained via DHCP. + # Any additional IPs of each type will be set as static + # addresses. + nicname = 'eth{idx}'.format(idx=idx) + dhcp_override = {'route-metric': (idx + 1) * 100} + dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override, + 'dhcp6': False} + for addr_type in ('ipv4', 'ipv6'): + addresses = intf.get(addr_type, {}).get('ipAddress', []) + if addr_type == 'ipv4': + default_prefix = '24' + else: + default_prefix = '128' + if addresses: + dev_config['dhcp6'] = True + # non-primary interfaces should have a higher + # route-metric (cost) so default routes prefer + # primary nic due to lower route-metric value + dev_config['dhcp6-overrides'] = dhcp_override + for addr in addresses[1:]: + # Append static address config for ip > 1 + netPrefix = intf[addr_type]['subnet'][0].get( + 'prefix', default_prefix) + privateIp = addr['privateIpAddress'] + if not dev_config.get('addresses'): + dev_config['addresses'] = [] + dev_config['addresses'].append( + '{ip}/{prefix}'.format( + ip=privateIp, prefix=netPrefix)) + if dev_config: + mac = ':'.join(re.findall(r'..', intf['macAddress'])) + dev_config.update({ + 'match': {'macaddress': mac.lower()}, + 'set-name': nicname + }) + # With netvsc, we can get two interfaces that + # share the same MAC, so we need to make sure + # our match condition also contains the driver + driver = device_driver(nicname) + if driver and driver == 'hv_netvsc': + dev_config['match']['driver'] = driver + netconfig['ethernets'][nicname] = dev_config + return netconfig + + +@azure_ds_telemetry_reporter +def _generate_network_config_from_fallback_config() -> dict: + """Generate fallback network config excluding mlx4_core & mlx5_core devices. + + @return: Dictionary containing network version 2 standard configuration. + """ + # Azure Dv4 and Ev4 series VMs always have mlx5 hardware. + # https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series + # https://docs.microsoft.com/en-us/azure/virtual-machines/ev4-esv4-series + # Earlier D and E series VMs (such as Dv2, Dv3, and Ev3 series VMs) + # can have either mlx4 or mlx5 hardware, with the older series VMs + # having a higher chance of coming with mlx4 hardware. + # https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series + # https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series + # https://docs.microsoft.com/en-us/azure/virtual-machines/ev3-esv3-series + blacklist = ['mlx4_core', 'mlx5_core'] + # generate a network config, blacklist picking mlx4_core and mlx5_core devs + return net.generate_fallback_config( + blacklist_drivers=blacklist, config_driver=True) @azure_ds_telemetry_reporter diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 2dda9925..2b22a879 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -162,8 +162,19 @@ MOCKPATH = 'cloudinit.sources.DataSourceAzure.' class TestParseNetworkConfig(CiTestCase): maxDiff = None + fallback_config = { + 'version': 1, + 'config': [{ + 'type': 'physical', 'name': 'eth0', + 'mac_address': '00:11:22:33:44:55', + 'params': {'driver': 'hv_netsvc'}, + 'subnets': [{'type': 'dhcp'}], + }] + } - def test_single_ipv4_nic_configuration(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_single_ipv4_nic_configuration(self, m_driver): """parse_network_config emits dhcp on single nic with ipv4""" expected = {'ethernets': { 'eth0': {'dhcp4': True, @@ -173,7 +184,9 @@ class TestParseNetworkConfig(CiTestCase): 'set-name': 'eth0'}}, 'version': 2} self.assertEqual(expected, dsaz.parse_network_config(NETWORK_METADATA)) - def test_increases_route_metric_for_non_primary_nics(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_increases_route_metric_for_non_primary_nics(self, m_driver): """parse_network_config increases route-metric for each nic""" expected = {'ethernets': { 'eth0': {'dhcp4': True, @@ -200,7 +213,9 @@ class TestParseNetworkConfig(CiTestCase): imds_data['network']['interface'].append(third_intf) self.assertEqual(expected, dsaz.parse_network_config(imds_data)) - def test_ipv4_and_ipv6_route_metrics_match_for_nics(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_ipv4_and_ipv6_route_metrics_match_for_nics(self, m_driver): """parse_network_config emits matching ipv4 and ipv6 route-metrics.""" expected = {'ethernets': { 'eth0': {'addresses': ['10.0.0.5/24', '2001:dead:beef::2/128'], @@ -242,7 +257,9 @@ class TestParseNetworkConfig(CiTestCase): imds_data['network']['interface'].append(third_intf) self.assertEqual(expected, dsaz.parse_network_config(imds_data)) - def test_ipv4_secondary_ips_will_be_static_addrs(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_ipv4_secondary_ips_will_be_static_addrs(self, m_driver): """parse_network_config emits primary ipv4 as dhcp others are static""" expected = {'ethernets': { 'eth0': {'addresses': ['10.0.0.5/24'], @@ -262,7 +279,9 @@ class TestParseNetworkConfig(CiTestCase): } self.assertEqual(expected, dsaz.parse_network_config(imds_data)) - def test_ipv6_secondary_ips_will_be_static_cidrs(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_ipv6_secondary_ips_will_be_static_cidrs(self, m_driver): """parse_network_config emits primary ipv6 as dhcp others are static""" expected = {'ethernets': { 'eth0': {'addresses': ['10.0.0.5/24', '2001:dead:beef::2/10'], @@ -301,6 +320,42 @@ class TestParseNetworkConfig(CiTestCase): }}, 'version': 2} self.assertEqual(expected, dsaz.parse_network_config(NETWORK_METADATA)) + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + @mock.patch('cloudinit.net.generate_fallback_config') + def test_parse_network_config_uses_fallback_cfg_when_no_network_metadata( + self, m_fallback_config, m_driver): + """parse_network_config generates fallback network config when the + IMDS instance metadata is corrupted/invalid, such as when + network metadata is not present. + """ + imds_metadata_missing_network_metadata = copy.deepcopy( + NETWORK_METADATA) + del imds_metadata_missing_network_metadata['network'] + m_fallback_config.return_value = self.fallback_config + self.assertEqual( + self.fallback_config, + dsaz.parse_network_config( + imds_metadata_missing_network_metadata)) + + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + @mock.patch('cloudinit.net.generate_fallback_config') + def test_parse_network_config_uses_fallback_cfg_when_no_interface_metadata( + self, m_fallback_config, m_driver): + """parse_network_config generates fallback network config when the + IMDS instance metadata is corrupted/invalid, such as when + network interface metadata is not present. + """ + imds_metadata_missing_interface_metadata = copy.deepcopy( + NETWORK_METADATA) + del imds_metadata_missing_interface_metadata['network']['interface'] + m_fallback_config.return_value = self.fallback_config + self.assertEqual( + self.fallback_config, + dsaz.parse_network_config( + imds_metadata_missing_interface_metadata)) + class TestGetMetadataFromIMDS(HttprettyTestCase): @@ -783,7 +838,9 @@ scbus-1 on xpt0 bus 0 self.assertTrue(ret) self.assertEqual(data['agent_invoked'], cfg['agent_command']) - def test_network_config_set_from_imds(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_network_config_set_from_imds(self, m_driver): """Datasource.network_config returns IMDS network data.""" sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {} @@ -801,7 +858,10 @@ scbus-1 on xpt0 bus 0 dsrc.get_data() self.assertEqual(expected_network_config, dsrc.network_config) - def test_network_config_set_from_imds_route_metric_for_secondary_nic(self): + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) + def test_network_config_set_from_imds_route_metric_for_secondary_nic( + self, m_driver): """Datasource.network_config adds route-metric to secondary nics.""" sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {} @@ -1157,8 +1217,10 @@ scbus-1 on xpt0 bus 0 self.assertEqual( [mock.call("/dev/cd0")], m_check_fbsd_cdrom.call_args_list) + @mock.patch('cloudinit.sources.DataSourceAzure.device_driver', + return_value=None) @mock.patch('cloudinit.net.generate_fallback_config') - def test_imds_network_config(self, mock_fallback): + def test_imds_network_config(self, mock_fallback, m_driver): """Network config is generated from IMDS network data when present.""" sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {'HostName': "myhost", 'UserName': "myuser"} @@ -1245,55 +1307,9 @@ scbus-1 on xpt0 bus 0 netconfig = dsrc.network_config self.assertEqual(netconfig, fallback_config) - mock_fallback.assert_called_with(blacklist_drivers=['mlx4_core'], - config_driver=True) - - @mock.patch('cloudinit.net.get_interface_mac') - @mock.patch('cloudinit.net.get_devicelist') - @mock.patch('cloudinit.net.device_driver') - @mock.patch('cloudinit.net.generate_fallback_config') - def test_fallback_network_config_blacklist(self, mock_fallback, mock_dd, - mock_devlist, mock_get_mac): - """On absent network metadata, blacklist mlx from fallback config.""" - odata = {'HostName': "myhost", 'UserName': "myuser"} - data = {'ovfcontent': construct_valid_ovf_env(data=odata), - 'sys_cfg': {}} - - fallback_config = { - 'version': 1, - 'config': [{ - 'type': 'physical', 'name': 'eth0', - 'mac_address': '00:11:22:33:44:55', - 'params': {'driver': 'hv_netsvc'}, - 'subnets': [{'type': 'dhcp'}], - }] - } - blacklist_config = { - 'type': 'physical', - 'name': 'eth1', - 'mac_address': '00:11:22:33:44:55', - 'params': {'driver': 'mlx4_core'} - } - mock_fallback.return_value = fallback_config - - mock_devlist.return_value = ['eth0', 'eth1'] - mock_dd.side_effect = [ - 'hv_netsvc', # list composition, skipped - 'mlx4_core', # list composition, match - 'mlx4_core', # config get driver name - ] - mock_get_mac.return_value = '00:11:22:33:44:55' - - dsrc = self._get_ds(data) - # Represent empty response from network imds - self.m_get_metadata_from_imds.return_value = {} - ret = dsrc.get_data() - self.assertTrue(ret) - - netconfig = dsrc.network_config - expected_config = fallback_config - expected_config['config'].append(blacklist_config) - self.assertEqual(netconfig, expected_config) + mock_fallback.assert_called_with( + blacklist_drivers=['mlx4_core', 'mlx5_core'], + config_driver=True) @mock.patch(MOCKPATH + 'subp.subp') def test_get_hostname_with_no_args(self, m_subp): -- cgit v1.2.3 From 8ec8c3fc63a59b85888a0b52356b784314a1d4cc Mon Sep 17 00:00:00 2001 From: Anh Vo Date: Tue, 13 Oct 2020 15:42:54 -0400 Subject: net: add the ability to blacklist network interfaces based on driver during enumeration of physical network devices (#591) --- cloudinit/distros/networking.py | 6 +++- cloudinit/net/__init__.py | 35 ++++++++++++------- cloudinit/sources/DataSourceAzure.py | 36 +++++++++++++------- tests/unittests/test_datasource/test_azure.py | 49 +++++++++++++++++++-------- 4 files changed, 86 insertions(+), 40 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/distros/networking.py b/cloudinit/distros/networking.py index 10ed249d..e407fa29 100644 --- a/cloudinit/distros/networking.py +++ b/cloudinit/distros/networking.py @@ -22,6 +22,9 @@ class Networking(metaclass=abc.ABCMeta): Hierarchy" in HACKING.rst for full details. """ + def __init__(self): + self.blacklist_drivers = None + def _get_current_rename_info(self) -> dict: return net._get_current_rename_info() @@ -68,7 +71,8 @@ class Networking(metaclass=abc.ABCMeta): return net.get_interfaces() def get_interfaces_by_mac(self) -> dict: - return net.get_interfaces_by_mac() + return net.get_interfaces_by_mac( + blacklist_drivers=self.blacklist_drivers) def get_master(self, devname: DeviceName): return net.get_master(devname) diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py index e233149a..75e79ca8 100644 --- a/cloudinit/net/__init__.py +++ b/cloudinit/net/__init__.py @@ -746,18 +746,22 @@ def get_ib_interface_hwaddr(ifname, ethernet_format): return mac -def get_interfaces_by_mac(): +def get_interfaces_by_mac(blacklist_drivers=None) -> dict: if util.is_FreeBSD(): - return get_interfaces_by_mac_on_freebsd() + return get_interfaces_by_mac_on_freebsd( + blacklist_drivers=blacklist_drivers) elif util.is_NetBSD(): - return get_interfaces_by_mac_on_netbsd() + return get_interfaces_by_mac_on_netbsd( + blacklist_drivers=blacklist_drivers) elif util.is_OpenBSD(): - return get_interfaces_by_mac_on_openbsd() + return get_interfaces_by_mac_on_openbsd( + blacklist_drivers=blacklist_drivers) else: - return get_interfaces_by_mac_on_linux() + return get_interfaces_by_mac_on_linux( + blacklist_drivers=blacklist_drivers) -def get_interfaces_by_mac_on_freebsd(): +def get_interfaces_by_mac_on_freebsd(blacklist_drivers=None) -> dict(): (out, _) = subp.subp(['ifconfig', '-a', 'ether']) # flatten each interface block in a single line @@ -784,7 +788,7 @@ def get_interfaces_by_mac_on_freebsd(): return results -def get_interfaces_by_mac_on_netbsd(): +def get_interfaces_by_mac_on_netbsd(blacklist_drivers=None) -> dict(): ret = {} re_field_match = ( r"(?P\w+).*address:\s" @@ -800,7 +804,7 @@ def get_interfaces_by_mac_on_netbsd(): return ret -def get_interfaces_by_mac_on_openbsd(): +def get_interfaces_by_mac_on_openbsd(blacklist_drivers=None) -> dict(): ret = {} re_field_match = ( r"(?P\w+).*lladdr\s" @@ -815,12 +819,13 @@ def get_interfaces_by_mac_on_openbsd(): return ret -def get_interfaces_by_mac_on_linux(): +def get_interfaces_by_mac_on_linux(blacklist_drivers=None) -> dict: """Build a dictionary of tuples {mac: name}. Bridges and any devices that have a 'stolen' mac are excluded.""" ret = {} - for name, mac, _driver, _devid in get_interfaces(): + for name, mac, _driver, _devid in get_interfaces( + blacklist_drivers=blacklist_drivers): if mac in ret: raise RuntimeError( "duplicate mac found! both '%s' and '%s' have mac '%s'" % @@ -838,11 +843,13 @@ def get_interfaces_by_mac_on_linux(): return ret -def get_interfaces(): +def get_interfaces(blacklist_drivers=None) -> list: """Return list of interface tuples (name, mac, driver, device_id) Bridges and any devices that have a 'stolen' mac are excluded.""" ret = [] + if blacklist_drivers is None: + blacklist_drivers = [] devs = get_devicelist() # 16 somewhat arbitrarily chosen. Normally a mac is 6 '00:' tokens. zero_mac = ':'.join(('00',) * 16) @@ -866,7 +873,11 @@ def get_interfaces(): # skip nics that have no mac (00:00....) if name != 'lo' and mac == zero_mac[:len(mac)]: continue - ret.append((name, mac, device_driver(name), device_devid(name))) + # skip nics that have drivers blacklisted + driver = device_driver(name) + if driver in blacklist_drivers: + continue + ret.append((name, mac, driver, device_devid(name))) return ret diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 773c60d7..fc32f8b1 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -83,6 +83,25 @@ UBUNTU_EXTENDED_NETWORK_SCRIPTS = [ '/run/network/interfaces.ephemeral.d', ] +# This list is used to blacklist devices that will be considered +# for renaming or fallback interfaces. +# +# On Azure network devices using these drivers are automatically +# configured by the platform and should not be configured by +# cloud-init's network configuration. +# +# Note: +# Azure Dv4 and Ev4 series VMs always have mlx5 hardware. +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/ev4-esv4-series +# Earlier D and E series VMs (such as Dv2, Dv3, and Ev3 series VMs) +# can have either mlx4 or mlx5 hardware, with the older series VMs +# having a higher chance of coming with mlx4 hardware. +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/ev3-esv3-series +BLACKLIST_DRIVERS = ['mlx4_core', 'mlx5_core'] + def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid): # extract the 'X' from dev.storvsc.X. if deviceid matches @@ -529,6 +548,8 @@ class DataSourceAzure(sources.DataSource): except Exception as e: LOG.warning("Failed to get system information: %s", e) + self.distro.networking.blacklist_drivers = BLACKLIST_DRIVERS + try: crawled_data = util.log_time( logfunc=LOG.debug, msg='Crawl of metadata service', @@ -1468,23 +1489,12 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: @azure_ds_telemetry_reporter def _generate_network_config_from_fallback_config() -> dict: - """Generate fallback network config excluding mlx4_core & mlx5_core devices. + """Generate fallback network config excluding blacklisted devices. @return: Dictionary containing network version 2 standard configuration. """ - # Azure Dv4 and Ev4 series VMs always have mlx5 hardware. - # https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series - # https://docs.microsoft.com/en-us/azure/virtual-machines/ev4-esv4-series - # Earlier D and E series VMs (such as Dv2, Dv3, and Ev3 series VMs) - # can have either mlx4 or mlx5 hardware, with the older series VMs - # having a higher chance of coming with mlx4 hardware. - # https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series - # https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series - # https://docs.microsoft.com/en-us/azure/virtual-machines/ev3-esv3-series - blacklist = ['mlx4_core', 'mlx5_core'] - # generate a network config, blacklist picking mlx4_core and mlx5_core devs return net.generate_fallback_config( - blacklist_drivers=blacklist, config_driver=True) + blacklist_drivers=BLACKLIST_DRIVERS, config_driver=True) @azure_ds_telemetry_reporter diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 2b22a879..3b9456f7 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -526,7 +526,7 @@ scbus-1 on xpt0 bus 0 ]) return dsaz - def _get_ds(self, data, agent_command=None, distro=None, + def _get_ds(self, data, agent_command=None, distro='ubuntu', apply_network=None): def dsdevs(): @@ -576,7 +576,7 @@ scbus-1 on xpt0 bus 0 side_effect=_wait_for_files)), ]) - if distro is not None: + if isinstance(distro, str): distro_cls = distros.fetch(distro) distro = distro_cls(distro, data.get('sys_cfg', {}), self.paths) dsrc = dsaz.DataSourceAzure( @@ -638,7 +638,7 @@ scbus-1 on xpt0 bus 0 # Return a non-matching asset tag value m_is_platform_viable.return_value = False dsrc = dsaz.DataSourceAzure( - {}, distro=None, paths=self.paths) + {}, distro=mock.Mock(), paths=self.paths) self.assertFalse(dsrc.get_data()) m_is_platform_viable.assert_called_with(dsrc.seed_dir) @@ -1311,6 +1311,28 @@ scbus-1 on xpt0 bus 0 blacklist_drivers=['mlx4_core', 'mlx5_core'], config_driver=True) + @mock.patch(MOCKPATH + 'net.get_interfaces') + @mock.patch(MOCKPATH + 'util.is_FreeBSD') + def test_blacklist_through_distro( + self, m_is_freebsd, m_net_get_interfaces): + """Verify Azure DS updates blacklist drivers in the distro's + networking object.""" + odata = {'HostName': "myhost", 'UserName': "myuser"} + data = {'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': {}} + + distro_cls = distros.fetch('ubuntu') + distro = distro_cls('ubuntu', {}, self.paths) + dsrc = self._get_ds(data, distro=distro) + dsrc.get_data() + self.assertEqual(distro.networking.blacklist_drivers, + dsaz.BLACKLIST_DRIVERS) + + m_is_freebsd.return_value = False + distro.networking.get_interfaces_by_mac() + m_net_get_interfaces.assert_called_with( + blacklist_drivers=dsaz.BLACKLIST_DRIVERS) + @mock.patch(MOCKPATH + 'subp.subp') def test_get_hostname_with_no_args(self, m_subp): dsaz.get_hostname() @@ -1421,8 +1443,7 @@ class TestAzureBounce(CiTestCase): if ovfcontent is not None: populate_dir(os.path.join(self.paths.seed_dir, "azure"), {'ovf-env.xml': ovfcontent}) - dsrc = dsaz.DataSourceAzure( - {}, distro=None, paths=self.paths) + dsrc = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) if agent_command is not None: dsrc.ds_cfg['agent_command'] = agent_command return dsrc @@ -1906,7 +1927,7 @@ class TestClearCachedData(CiTestCase): tmp = self.tmp_dir() paths = helpers.Paths( {'cloud_dir': tmp, 'run_dir': tmp}) - dsrc = dsaz.DataSourceAzure({}, distro=None, paths=paths) + dsrc = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=paths) clean_values = [dsrc.metadata, dsrc.userdata, dsrc._metadata_imds] dsrc.metadata = 'md' dsrc.userdata = 'ud' @@ -1970,7 +1991,7 @@ class TestPreprovisioningShouldReprovision(CiTestCase): """The _should_reprovision method should return true with config flag present.""" isfile.return_value = False - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) self.assertTrue(dsa._should_reprovision( (None, None, {'PreprovisionedVm': True}, None))) @@ -1978,7 +1999,7 @@ class TestPreprovisioningShouldReprovision(CiTestCase): """The _should_reprovision method should return True if the sentinal exists.""" isfile.return_value = True - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) self.assertTrue(dsa._should_reprovision( (None, None, {'preprovisionedvm': False}, None))) @@ -1986,7 +2007,7 @@ class TestPreprovisioningShouldReprovision(CiTestCase): """The _should_reprovision method should return False if config and sentinal are not present.""" isfile.return_value = False - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) self.assertFalse(dsa._should_reprovision((None, None, {}, None))) @mock.patch(MOCKPATH + 'DataSourceAzure._poll_imds') @@ -1997,7 +2018,7 @@ class TestPreprovisioningShouldReprovision(CiTestCase): username = "myuser" odata = {'HostName': hostname, 'UserName': username} _poll_imds.return_value = construct_valid_ovf_env(data=odata) - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) dsa._reprovision() _poll_imds.assert_called_with() @@ -2051,7 +2072,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): fake_resp.side_effect = fake_timeout_once - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() self.assertEqual(report_ready_func.call_count, 1) @@ -2071,7 +2092,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'}] m_media_switch.return_value = None - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() self.assertEqual(report_ready_func.call_count, 0) @@ -2109,7 +2130,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): full_url = url.format(host) fake_resp.return_value = mock.MagicMock(status_code=200, text="ovf", content="ovf") - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) self.assertTrue(len(dsa._poll_imds()) > 0) self.assertEqual(fake_resp.call_args_list, [mock.call(allow_redirects=True, @@ -2146,7 +2167,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): content = construct_valid_ovf_env(data=odata) fake_resp.return_value = mock.MagicMock(status_code=200, text=content, content=content) - dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) md, _ud, cfg, _d = dsa._reprovision() self.assertEqual(md['local-hostname'], hostname) self.assertEqual(cfg['system_info']['default_user']['name'], username) -- cgit v1.2.3 From 8766784f4b1d1f9f6a9094e1268e4accb811ea7f Mon Sep 17 00:00:00 2001 From: Johnson Shi Date: Fri, 16 Oct 2020 08:54:38 -0700 Subject: DataSourceAzure: write marker file after report ready in preprovisioning (#590) DataSourceAzure previously writes the preprovisioning reported ready marker file before it goes through the report ready workflow. On certain VM instances, the marker file is successfully written but then reporting ready fails. Upon rare VM reboots by the platform, cloud-init sees that the report ready marker file already exists. The existence of this marker file tells cloud-init not to report ready again (because it mistakenly assumes that it already reported ready in preprovisioning). In this scenario, cloud-init instead erroneously takes the reprovisioning workflow instead of reporting ready again. --- cloudinit/sources/DataSourceAzure.py | 23 +++++- tests/unittests/test_datasource/test_azure.py | 86 +++++++++++++++++----- .../unittests/test_datasource/test_azure_helper.py | 3 +- 3 files changed, 90 insertions(+), 22 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 8858fbd5..70e32f46 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -720,12 +720,23 @@ class DataSourceAzure(sources.DataSource): self._ephemeral_dhcp_ctx.clean_network() break + report_ready_succeeded = self._report_ready(lease=lease) + if not report_ready_succeeded: + msg = ('Failed reporting ready while in ' + 'the preprovisioning pool.') + report_diagnostic_event(msg, logger_func=LOG.error) + self._ephemeral_dhcp_ctx.clean_network() + raise sources.InvalidMetaDataException(msg) + path = REPORTED_READY_MARKER_FILE LOG.info( "Creating a marker file to report ready: %s", path) util.write_file(path, "{pid}: {time}\n".format( pid=os.getpid(), time=time())) - self._report_ready(lease=lease) + report_diagnostic_event( + 'Successfully created reported ready marker file ' + 'while in the preprovisioning pool.', + logger_func=LOG.debug) report_ready = False with events.ReportEventStack( @@ -773,14 +784,20 @@ class DataSourceAzure(sources.DataSource): return return_val @azure_ds_telemetry_reporter - def _report_ready(self, lease): - """Tells the fabric provisioning has completed """ + def _report_ready(self, lease: dict) -> bool: + """Tells the fabric provisioning has completed. + + @param lease: dhcp lease to use for sending the ready signal. + @return: The success status of sending the ready signal. + """ try: get_metadata_from_fabric(None, lease['unknown-245']) + return True except Exception as e: report_diagnostic_event( "Error communicating with Azure fabric; You may experience " "connectivity issues: %s" % e, logger_func=LOG.warning) + return False def _should_reprovision(self, ret): """Whether or not we should poll IMDS for reprovisioning data. diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 3b9456f7..56c1cf18 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -1161,6 +1161,19 @@ scbus-1 on xpt0 bus 0 dsrc = self._get_ds({'ovfcontent': xml}) dsrc.get_data() + def test_dsaz_report_ready_returns_true_when_report_succeeds( + self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + self.assertTrue(dsrc._report_ready(lease=mock.MagicMock())) + + def test_dsaz_report_ready_returns_false_and_does_not_propagate_exc( + self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + self.get_metadata_from_fabric.side_effect = Exception + self.assertFalse(dsrc._report_ready(lease=mock.MagicMock())) + def test_exception_fetching_fabric_data_doesnt_propagate(self): """Errors communicating with fabric should warn, but return True.""" dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) @@ -2041,7 +2054,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): @mock.patch('time.sleep', mock.MagicMock()) @mock.patch(MOCKPATH + 'EphemeralDHCPv4') def test_poll_imds_re_dhcp_on_timeout(self, m_dhcpv4, report_ready_func, - fake_resp, m_media_switch, m_dhcp, + m_request, m_media_switch, m_dhcp, m_net): """The poll_imds will retry DHCP on IMDS timeout.""" report_file = self.tmp_path('report_marker', self.tmp) @@ -2070,7 +2083,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): # Third try should succeed and stop retries or redhcp return mock.MagicMock(status_code=200, text="good", content="good") - fake_resp.side_effect = fake_timeout_once + m_request.side_effect = fake_timeout_once dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): @@ -2080,11 +2093,10 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls') self.assertEqual(4, self.tries, 'Expected 4 total reads from IMDS') - def test_poll_imds_report_ready_false(self, - report_ready_func, fake_resp, - m_media_switch, m_dhcp, m_net): - """The poll_imds should not call reporting ready - when flag is false""" + def test_does_not_poll_imds_report_ready_when_marker_file_exists( + self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net): + """poll_imds should not call report ready when the reported ready + marker file exists""" report_file = self.tmp_path('report_marker', self.tmp) write_file(report_file, content='dont run report_ready :)') m_dhcp.return_value = [{ @@ -2095,11 +2107,49 @@ class TestPreprovisioningPollIMDS(CiTestCase): dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() - self.assertEqual(report_ready_func.call_count, 0) + self.assertEqual(m_report_ready.call_count, 0) + + def test_poll_imds_report_ready_success_writes_marker_file( + self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net): + """poll_imds should write the report_ready marker file if + reporting ready succeeds""" + report_file = self.tmp_path('report_marker', self.tmp) + m_dhcp.return_value = [{ + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'}] + m_media_switch.return_value = None + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + self.assertFalse(os.path.exists(report_file)) + with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): + dsa._poll_imds() + self.assertEqual(m_report_ready.call_count, 1) + self.assertTrue(os.path.exists(report_file)) + + def test_poll_imds_report_ready_failure_raises_exc_and_doesnt_write_marker( + self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net): + """poll_imds should write the report_ready marker file if + reporting ready succeeds""" + report_file = self.tmp_path('report_marker', self.tmp) + m_dhcp.return_value = [{ + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'}] + m_media_switch.return_value = None + m_report_ready.return_value = False + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + self.assertFalse(os.path.exists(report_file)) + with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): + self.assertRaises( + InvalidMetaDataException, + dsa._poll_imds) + self.assertEqual(m_report_ready.call_count, 1) + self.assertFalse(os.path.exists(report_file)) -@mock.patch(MOCKPATH + 'subp.subp') -@mock.patch(MOCKPATH + 'util.write_file') +@mock.patch(MOCKPATH + 'DataSourceAzure._report_ready', mock.MagicMock()) +@mock.patch(MOCKPATH + 'subp.subp', mock.MagicMock()) +@mock.patch(MOCKPATH + 'util.write_file', mock.MagicMock()) @mock.patch(MOCKPATH + 'util.is_FreeBSD') @mock.patch('cloudinit.sources.helpers.netlink.' 'wait_for_media_disconnect_connect') @@ -2115,10 +2165,10 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): self.paths = helpers.Paths({'cloud_dir': tmp}) dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d - def test_poll_imds_returns_ovf_env(self, fake_resp, + def test_poll_imds_returns_ovf_env(self, m_request, m_dhcp, m_net, m_media_switch, - m_is_bsd, write_f, subp): + m_is_bsd): """The _poll_imds method should return the ovf_env.xml.""" m_is_bsd.return_value = False m_media_switch.return_value = None @@ -2128,11 +2178,11 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02' host = "169.254.169.254" full_url = url.format(host) - fake_resp.return_value = mock.MagicMock(status_code=200, text="ovf", + m_request.return_value = mock.MagicMock(status_code=200, text="ovf", content="ovf") dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) self.assertTrue(len(dsa._poll_imds()) > 0) - self.assertEqual(fake_resp.call_args_list, + self.assertEqual(m_request.call_args_list, [mock.call(allow_redirects=True, headers={'Metadata': 'true', 'User-Agent': @@ -2147,10 +2197,10 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): static_routes=None) self.assertEqual(m_net.call_count, 2) - def test__reprovision_calls__poll_imds(self, fake_resp, + def test__reprovision_calls__poll_imds(self, m_request, m_dhcp, m_net, m_media_switch, - m_is_bsd, write_f, subp): + m_is_bsd): """The _reprovision method should call poll IMDS.""" m_is_bsd.return_value = False m_media_switch.return_value = None @@ -2165,7 +2215,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): username = "myuser" odata = {'HostName': hostname, 'UserName': username} content = construct_valid_ovf_env(data=odata) - fake_resp.return_value = mock.MagicMock(status_code=200, text=content, + m_request.return_value = mock.MagicMock(status_code=200, text=content, content=content) dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) md, _ud, cfg, _d = dsa._reprovision() @@ -2182,7 +2232,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS, url=full_url ), - fake_resp.call_args_list) + m_request.call_args_list) self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9', diff --git a/tests/unittests/test_datasource/test_azure_helper.py b/tests/unittests/test_datasource/test_azure_helper.py index 5c31b8be..6e004e34 100644 --- a/tests/unittests/test_datasource/test_azure_helper.py +++ b/tests/unittests/test_datasource/test_azure_helper.py @@ -742,7 +742,8 @@ class TestGetMetadataGoalStateXMLAndReportReadyToFabric(CiTestCase): self.assertEqual(1, shim.return_value.clean_up.call_count) @mock.patch.object(azure_helper, 'WALinuxAgentShim') - def test_failure_in_registration_calls_clean_up(self, shim): + def test_failure_in_registration_propagates_exc_and_calls_clean_up( + self, shim): shim.return_value.register_with_azure_and_fetch_data.side_effect = ( SentinelException) self.assertRaises(SentinelException, -- cgit v1.2.3 From 0af1ff1eaf593c325b4f53181a572110eb016c50 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 2 Nov 2020 15:41:11 -0500 Subject: cloudinit: move dmi functions out of util (#622) This just separates the reading of dmi values into its own file. Some things of note: * left import of util in dmi.py only for 'is_container' It'd be good if is_container was not in util. * just the use of 'util.is_x86' to dmi.py * open() is used directly rather than load_file. --- cloudinit/dmi.py | 131 ++++++++++++++++++++++ cloudinit/sources/DataSourceAliYun.py | 4 +- cloudinit/sources/DataSourceAltCloud.py | 3 +- cloudinit/sources/DataSourceAzure.py | 5 +- cloudinit/sources/DataSourceCloudSigma.py | 4 +- cloudinit/sources/DataSourceEc2.py | 9 +- cloudinit/sources/DataSourceExoscale.py | 3 +- cloudinit/sources/DataSourceGCE.py | 5 +- cloudinit/sources/DataSourceHetzner.py | 5 +- cloudinit/sources/DataSourceNoCloud.py | 3 +- cloudinit/sources/DataSourceOVF.py | 5 +- cloudinit/sources/DataSourceOpenStack.py | 5 +- cloudinit/sources/DataSourceOracle.py | 5 +- cloudinit/sources/DataSourceScaleway.py | 3 +- cloudinit/sources/DataSourceSmartOS.py | 3 +- cloudinit/sources/__init__.py | 3 +- cloudinit/sources/helpers/digitalocean.py | 5 +- cloudinit/sources/tests/test_oracle.py | 6 +- cloudinit/tests/test_dmi.py | 131 ++++++++++++++++++++++ cloudinit/util.py | 119 -------------------- tests/unittests/test_datasource/test_aliyun.py | 6 +- tests/unittests/test_datasource/test_altcloud.py | 21 ++-- tests/unittests/test_datasource/test_azure.py | 10 +- tests/unittests/test_datasource/test_nocloud.py | 3 +- tests/unittests/test_datasource/test_openstack.py | 16 +-- tests/unittests/test_datasource/test_ovf.py | 16 +-- tests/unittests/test_datasource/test_scaleway.py | 8 +- tests/unittests/test_util.py | 123 -------------------- 28 files changed, 348 insertions(+), 312 deletions(-) create mode 100644 cloudinit/dmi.py create mode 100644 cloudinit/tests/test_dmi.py (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/dmi.py b/cloudinit/dmi.py new file mode 100644 index 00000000..96e0e423 --- /dev/null +++ b/cloudinit/dmi.py @@ -0,0 +1,131 @@ +# This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import log as logging +from cloudinit import subp +from cloudinit.util import is_container + +import os + +LOG = logging.getLogger(__name__) + +# Path for DMI Data +DMI_SYS_PATH = "/sys/class/dmi/id" + +# dmidecode and /sys/class/dmi/id/* use different names for the same value, +# this allows us to refer to them by one canonical name +DMIDECODE_TO_DMI_SYS_MAPPING = { + 'baseboard-asset-tag': 'board_asset_tag', + 'baseboard-manufacturer': 'board_vendor', + 'baseboard-product-name': 'board_name', + 'baseboard-serial-number': 'board_serial', + 'baseboard-version': 'board_version', + 'bios-release-date': 'bios_date', + 'bios-vendor': 'bios_vendor', + 'bios-version': 'bios_version', + 'chassis-asset-tag': 'chassis_asset_tag', + 'chassis-manufacturer': 'chassis_vendor', + 'chassis-serial-number': 'chassis_serial', + 'chassis-version': 'chassis_version', + 'system-manufacturer': 'sys_vendor', + 'system-product-name': 'product_name', + 'system-serial-number': 'product_serial', + 'system-uuid': 'product_uuid', + 'system-version': 'product_version', +} + + +def _read_dmi_syspath(key): + """ + Reads dmi data with from /sys/class/dmi/id + """ + if key not in DMIDECODE_TO_DMI_SYS_MAPPING: + return None + mapped_key = DMIDECODE_TO_DMI_SYS_MAPPING[key] + dmi_key_path = "{0}/{1}".format(DMI_SYS_PATH, mapped_key) + + LOG.debug("querying dmi data %s", dmi_key_path) + if not os.path.exists(dmi_key_path): + LOG.debug("did not find %s", dmi_key_path) + return None + + try: + with open(dmi_key_path, "rb") as fp: + key_data = fp.read() + except PermissionError: + LOG.debug("Could not read %s", dmi_key_path) + return None + + # uninitialized dmi values show as all \xff and /sys appends a '\n'. + # in that event, return empty string. + if key_data == b'\xff' * (len(key_data) - 1) + b'\n': + key_data = b"" + + try: + return key_data.decode('utf8').strip() + except UnicodeDecodeError as e: + LOG.error("utf-8 decode of content (%s) in %s failed: %s", + dmi_key_path, key_data, e) + + return None + + +def _call_dmidecode(key, dmidecode_path): + """ + Calls out to dmidecode to get the data out. This is mostly for supporting + OS's without /sys/class/dmi/id support. + """ + try: + cmd = [dmidecode_path, "--string", key] + (result, _err) = subp.subp(cmd) + result = result.strip() + LOG.debug("dmidecode returned '%s' for '%s'", result, key) + if result.replace(".", "") == "": + return "" + return result + except (IOError, OSError) as e: + LOG.debug('failed dmidecode cmd: %s\n%s', cmd, e) + return None + + +def read_dmi_data(key): + """ + Wrapper for reading DMI data. + + If running in a container return None. This is because DMI data is + assumed to be not useful in a container as it does not represent the + container but rather the host. + + This will do the following (returning the first that produces a + result): + 1) Use a mapping to translate `key` from dmidecode naming to + sysfs naming and look in /sys/class/dmi/... for a value. + 2) Use `key` as a sysfs key directly and look in /sys/class/dmi/... + 3) Fall-back to passing `key` to `dmidecode --string`. + + If all of the above fail to find a value, None will be returned. + """ + + if is_container(): + return None + + syspath_value = _read_dmi_syspath(key) + if syspath_value is not None: + return syspath_value + + def is_x86(arch): + return (arch == 'x86_64' or (arch[0] == 'i' and arch[2:] == '86')) + + # running dmidecode can be problematic on some arches (LP: #1243287) + uname_arch = os.uname()[4] + if not (is_x86(uname_arch) or uname_arch in ('aarch64', 'amd64')): + LOG.debug("dmidata is not supported on %s", uname_arch) + return None + + dmidecode_path = subp.which('dmidecode') + if dmidecode_path: + return _call_dmidecode(key, dmidecode_path) + + LOG.warning("did not find either path %s or dmidecode command", + DMI_SYS_PATH) + return None + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceAliYun.py b/cloudinit/sources/DataSourceAliYun.py index 45cc9f00..09052873 100644 --- a/cloudinit/sources/DataSourceAliYun.py +++ b/cloudinit/sources/DataSourceAliYun.py @@ -1,8 +1,8 @@ # This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import dmi from cloudinit import sources from cloudinit.sources import DataSourceEc2 as EC2 -from cloudinit import util ALIYUN_PRODUCT = "Alibaba Cloud ECS" @@ -30,7 +30,7 @@ class DataSourceAliYun(EC2.DataSourceEc2): def _is_aliyun(): - return util.read_dmi_data('system-product-name') == ALIYUN_PRODUCT + return dmi.read_dmi_data('system-product-name') == ALIYUN_PRODUCT def parse_public_keys(public_keys): diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py index ac3ecc3d..cd93412a 100644 --- a/cloudinit/sources/DataSourceAltCloud.py +++ b/cloudinit/sources/DataSourceAltCloud.py @@ -16,6 +16,7 @@ import errno import os import os.path +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import subp @@ -109,7 +110,7 @@ class DataSourceAltCloud(sources.DataSource): CLOUD_INFO_FILE) return 'UNKNOWN' return cloud_type - system_name = util.read_dmi_data("system-product-name") + system_name = dmi.read_dmi_data("system-product-name") if not system_name: return 'UNKNOWN' diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 70e32f46..fa3e0a2b 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -15,6 +15,7 @@ from time import time from xml.dom import minidom import xml.etree.ElementTree as ET +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net from cloudinit.event import EventType @@ -630,7 +631,7 @@ class DataSourceAzure(sources.DataSource): def _iid(self, previous=None): prev_iid_path = os.path.join( self.paths.get_cpath('data'), 'instance-id') - iid = util.read_dmi_data('system-uuid') + iid = dmi.read_dmi_data('system-uuid') if os.path.exists(prev_iid_path): previous = util.load_file(prev_iid_path).strip() if is_byte_swapped(previous, iid): @@ -1630,7 +1631,7 @@ def _is_platform_viable(seed_dir): description="found azure asset tag", parent=azure_ds_reporter ) as evt: - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') if asset_tag == AZURE_CHASSIS_ASSET_TAG: return True msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag diff --git a/cloudinit/sources/DataSourceCloudSigma.py b/cloudinit/sources/DataSourceCloudSigma.py index df88f677..f63baf74 100644 --- a/cloudinit/sources/DataSourceCloudSigma.py +++ b/cloudinit/sources/DataSourceCloudSigma.py @@ -9,9 +9,9 @@ import re from cloudinit.cs_utils import Cepko, SERIAL_PORT +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources -from cloudinit import util LOG = logging.getLogger(__name__) @@ -38,7 +38,7 @@ class DataSourceCloudSigma(sources.DataSource): """ LOG.debug("determining hypervisor product name via dmi data") - sys_product_name = util.read_dmi_data("system-product-name") + sys_product_name = dmi.read_dmi_data("system-product-name") if not sys_product_name: LOG.debug("system-product-name not available in dmi data") return False diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 1d09c12a..1930a509 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -11,6 +11,7 @@ import os import time +from cloudinit import dmi from cloudinit import ec2_utils as ec2 from cloudinit import log as logging from cloudinit import net @@ -699,26 +700,26 @@ def _collect_platform_data(): uuid = util.load_file("/sys/hypervisor/uuid").strip() data['uuid_source'] = 'hypervisor' except Exception: - uuid = util.read_dmi_data('system-uuid') + uuid = dmi.read_dmi_data('system-uuid') data['uuid_source'] = 'dmi' if uuid is None: uuid = '' data['uuid'] = uuid.lower() - serial = util.read_dmi_data('system-serial-number') + serial = dmi.read_dmi_data('system-serial-number') if serial is None: serial = '' data['serial'] = serial.lower() - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') if asset_tag is None: asset_tag = '' data['asset_tag'] = asset_tag.lower() - vendor = util.read_dmi_data('system-manufacturer') + vendor = dmi.read_dmi_data('system-manufacturer') data['vendor'] = (vendor if vendor else '').lower() return data diff --git a/cloudinit/sources/DataSourceExoscale.py b/cloudinit/sources/DataSourceExoscale.py index d59aefd1..adee6d79 100644 --- a/cloudinit/sources/DataSourceExoscale.py +++ b/cloudinit/sources/DataSourceExoscale.py @@ -3,6 +3,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import dmi from cloudinit import ec2_utils as ec2 from cloudinit import log as logging from cloudinit import sources @@ -135,7 +136,7 @@ class DataSourceExoscale(sources.DataSource): return self.extra_config def _is_platform_viable(self): - return util.read_dmi_data('system-product-name').startswith( + return dmi.read_dmi_data('system-product-name').startswith( EXOSCALE_DMI_NAME) diff --git a/cloudinit/sources/DataSourceGCE.py b/cloudinit/sources/DataSourceGCE.py index 0ec5f6ec..746caddb 100644 --- a/cloudinit/sources/DataSourceGCE.py +++ b/cloudinit/sources/DataSourceGCE.py @@ -7,6 +7,7 @@ import json from base64 import b64decode +from cloudinit import dmi from cloudinit.distros import ug_util from cloudinit import log as logging from cloudinit import sources @@ -248,12 +249,12 @@ def read_md(address=None, platform_check=True): def platform_reports_gce(): - pname = util.read_dmi_data('system-product-name') or "N/A" + pname = dmi.read_dmi_data('system-product-name') or "N/A" if pname == "Google Compute Engine": return True # system-product-name is not always guaranteed (LP: #1674861) - serial = util.read_dmi_data('system-serial-number') or "N/A" + serial = dmi.read_dmi_data('system-serial-number') or "N/A" if serial.startswith("GoogleCloud-"): return True diff --git a/cloudinit/sources/DataSourceHetzner.py b/cloudinit/sources/DataSourceHetzner.py index 8e4d4b69..c7c88dd7 100644 --- a/cloudinit/sources/DataSourceHetzner.py +++ b/cloudinit/sources/DataSourceHetzner.py @@ -6,6 +6,7 @@ """Hetzner Cloud API Documentation https://docs.hetzner.cloud/""" +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net as cloudnet from cloudinit import sources @@ -113,11 +114,11 @@ class DataSourceHetzner(sources.DataSource): def get_hcloud_data(): - vendor_name = util.read_dmi_data('system-manufacturer') + vendor_name = dmi.read_dmi_data('system-manufacturer') if vendor_name != "Hetzner": return (False, None) - serial = util.read_dmi_data("system-serial-number") + serial = dmi.read_dmi_data("system-serial-number") if serial: LOG.debug("Running on Hetzner Cloud: serial=%s", serial) else: diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index d4a175e8..a126aad3 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -11,6 +11,7 @@ import errno import os +from cloudinit import dmi from cloudinit import log as logging from cloudinit.net import eni from cloudinit import sources @@ -61,7 +62,7 @@ class DataSourceNoCloud(sources.DataSource): # Parse the system serial label from dmi. If not empty, try parsing # like the commandline md = {} - serial = util.read_dmi_data('system-serial-number') + serial = dmi.read_dmi_data('system-serial-number') if serial and load_cmdline_data(md, serial): found.append("dmi") mydata = _merge_new_seed(mydata, {'meta-data': md}) diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index a5ccb8f6..741c140a 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -14,6 +14,7 @@ import re import time from xml.dom import minidom +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import subp @@ -83,7 +84,7 @@ class DataSourceOVF(sources.DataSource): (seedfile, contents) = get_ovf_env(self.paths.seed_dir) - system_type = util.read_dmi_data("system-product-name") + system_type = dmi.read_dmi_data("system-product-name") if system_type is None: LOG.debug("No system-product-name found") @@ -322,7 +323,7 @@ class DataSourceOVF(sources.DataSource): return True def _get_subplatform(self): - system_type = util.read_dmi_data("system-product-name").lower() + system_type = dmi.read_dmi_data("system-product-name").lower() if system_type == 'vmware': return 'vmware (%s)' % self.seed return 'ovf (%s)' % self.seed diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index 0ede0a0e..b3406c67 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -6,6 +6,7 @@ import time +from cloudinit import dmi from cloudinit import log as logging from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit import sources @@ -225,10 +226,10 @@ def detect_openstack(accept_oracle=False): """Return True when a potential OpenStack platform is detected.""" if not util.is_x86(): return True # Non-Intel cpus don't properly report dmi product names - product_name = util.read_dmi_data('system-product-name') + product_name = dmi.read_dmi_data('system-product-name') if product_name in VALID_DMI_PRODUCT_NAMES: return True - elif util.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: + elif dmi.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: return True elif accept_oracle and oracle._is_platform_viable(): return True diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py index 20d6487d..bf81b10b 100644 --- a/cloudinit/sources/DataSourceOracle.py +++ b/cloudinit/sources/DataSourceOracle.py @@ -17,6 +17,7 @@ import base64 from collections import namedtuple from contextlib import suppress as noop +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net, sources, util from cloudinit.net import ( @@ -273,12 +274,12 @@ class DataSourceOracle(sources.DataSource): def _read_system_uuid(): - sys_uuid = util.read_dmi_data('system-uuid') + sys_uuid = dmi.read_dmi_data('system-uuid') return None if sys_uuid is None else sys_uuid.lower() def _is_platform_viable(): - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') return asset_tag == CHASSIS_ASSET_TAG diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py index 83c2bf65..41be7665 100644 --- a/cloudinit/sources/DataSourceScaleway.py +++ b/cloudinit/sources/DataSourceScaleway.py @@ -25,6 +25,7 @@ import requests from requests.packages.urllib3.connection import HTTPConnection from requests.packages.urllib3.poolmanager import PoolManager +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper @@ -56,7 +57,7 @@ def on_scaleway(): * the initrd created the file /var/run/scaleway. * "scaleway" is in the kernel cmdline. """ - vendor_name = util.read_dmi_data('system-manufacturer') + vendor_name = dmi.read_dmi_data('system-manufacturer') if vendor_name == 'Scaleway': return True diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py index f1f903bc..fd292baa 100644 --- a/cloudinit/sources/DataSourceSmartOS.py +++ b/cloudinit/sources/DataSourceSmartOS.py @@ -30,6 +30,7 @@ import random import re import socket +from cloudinit import dmi from cloudinit import log as logging from cloudinit import serial from cloudinit import sources @@ -767,7 +768,7 @@ def get_smartos_environ(uname_version=None, product_name=None): return SMARTOS_ENV_LX_BRAND if product_name is None: - system_type = util.read_dmi_data("system-product-name") + system_type = dmi.read_dmi_data("system-product-name") else: system_type = product_name diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index c4d60fff..9dccc687 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -14,6 +14,7 @@ import json import os from collections import namedtuple +from cloudinit import dmi from cloudinit import importer from cloudinit import log as logging from cloudinit import net @@ -809,7 +810,7 @@ def instance_id_matches_system_uuid(instance_id, field='system-uuid'): if not instance_id: return False - dmi_value = util.read_dmi_data(field) + dmi_value = dmi.read_dmi_data(field) if not dmi_value: return False return instance_id.lower() == dmi_value.lower() diff --git a/cloudinit/sources/helpers/digitalocean.py b/cloudinit/sources/helpers/digitalocean.py index b545c4d6..f9be4ecb 100644 --- a/cloudinit/sources/helpers/digitalocean.py +++ b/cloudinit/sources/helpers/digitalocean.py @@ -5,6 +5,7 @@ import json import random +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net as cloudnet from cloudinit import url_helper @@ -195,11 +196,11 @@ def read_sysinfo(): # SMBIOS information # Detect if we are on DigitalOcean and return the Droplet's ID - vendor_name = util.read_dmi_data("system-manufacturer") + vendor_name = dmi.read_dmi_data("system-manufacturer") if vendor_name != "DigitalOcean": return (False, None) - droplet_id = util.read_dmi_data("system-serial-number") + droplet_id = dmi.read_dmi_data("system-serial-number") if droplet_id: LOG.debug("system identified via SMBIOS as DigitalOcean Droplet: %s", droplet_id) diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py index 7bd23813..a7bbdfd9 100644 --- a/cloudinit/sources/tests/test_oracle.py +++ b/cloudinit/sources/tests/test_oracle.py @@ -153,20 +153,20 @@ class TestDataSourceOracle: class TestIsPlatformViable(test_helpers.CiTestCase): - @mock.patch(DS_PATH + ".util.read_dmi_data", + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value=oracle.CHASSIS_ASSET_TAG) def test_expected_viable(self, m_read_dmi_data): """System with known chassis tag is viable.""" self.assertTrue(oracle._is_platform_viable()) m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) - @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None) + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value=None) def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data): """System without known chassis tag is not viable.""" self.assertFalse(oracle._is_platform_viable()) m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) - @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs") + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value="LetsGoCubs") def test_expected_not_viable_other(self, m_read_dmi_data): """System with unnown chassis tag is not viable.""" self.assertFalse(oracle._is_platform_viable()) diff --git a/cloudinit/tests/test_dmi.py b/cloudinit/tests/test_dmi.py new file mode 100644 index 00000000..4a8af257 --- /dev/null +++ b/cloudinit/tests/test_dmi.py @@ -0,0 +1,131 @@ +from cloudinit.tests import helpers +from cloudinit import dmi +from cloudinit import util +from cloudinit import subp + +import os +import tempfile +import shutil +from unittest import mock + + +class TestReadDMIData(helpers.FilesystemMockingTestCase): + + def setUp(self): + super(TestReadDMIData, self).setUp() + self.new_root = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.new_root) + self.reRoot(self.new_root) + p = mock.patch("cloudinit.dmi.is_container", return_value=False) + self.addCleanup(p.stop) + self._m_is_container = p.start() + + def _create_sysfs_parent_directory(self): + util.ensure_dir(os.path.join('sys', 'class', 'dmi', 'id')) + + def _create_sysfs_file(self, key, content): + """Mocks the sys path found on Linux systems.""" + self._create_sysfs_parent_directory() + dmi_key = "/sys/class/dmi/id/{0}".format(key) + util.write_file(dmi_key, content) + + def _configure_dmidecode_return(self, key, content, error=None): + """ + In order to test a missing sys path and call outs to dmidecode, this + function fakes the results of dmidecode to test the results. + """ + def _dmidecode_subp(cmd): + if cmd[-1] != key: + raise subp.ProcessExecutionError() + return (content, error) + + self.patched_funcs.enter_context( + mock.patch("cloudinit.dmi.subp.which", side_effect=lambda _: True)) + self.patched_funcs.enter_context( + mock.patch("cloudinit.dmi.subp.subp", side_effect=_dmidecode_subp)) + + def patch_mapping(self, new_mapping): + self.patched_funcs.enter_context( + mock.patch('cloudinit.dmi.DMIDECODE_TO_DMI_SYS_MAPPING', + new_mapping)) + + def test_sysfs_used_with_key_in_mapping_and_file_on_disk(self): + self.patch_mapping({'mapped-key': 'mapped-value'}) + expected_dmi_value = 'sys-used-correctly' + self._create_sysfs_file('mapped-value', expected_dmi_value) + self._configure_dmidecode_return('mapped-key', 'wrong-wrong-wrong') + self.assertEqual(expected_dmi_value, dmi.read_dmi_data('mapped-key')) + + def test_dmidecode_used_if_no_sysfs_file_on_disk(self): + self.patch_mapping({}) + self._create_sysfs_parent_directory() + expected_dmi_value = 'dmidecode-used' + self._configure_dmidecode_return('use-dmidecode', expected_dmi_value) + with mock.patch("cloudinit.util.os.uname") as m_uname: + m_uname.return_value = ('x-sysname', 'x-nodename', + 'x-release', 'x-version', 'x86_64') + self.assertEqual(expected_dmi_value, + dmi.read_dmi_data('use-dmidecode')) + + def test_dmidecode_not_used_on_arm(self): + self.patch_mapping({}) + print("current =%s", subp) + self._create_sysfs_parent_directory() + dmi_val = 'from-dmidecode' + dmi_name = 'use-dmidecode' + self._configure_dmidecode_return(dmi_name, dmi_val) + print("now =%s", subp) + + expected = {'armel': None, 'aarch64': dmi_val, 'x86_64': dmi_val} + found = {} + # we do not run the 'dmi-decode' binary on some arches + # verify that anything requested that is not in the sysfs dir + # will return None on those arches. + with mock.patch("cloudinit.util.os.uname") as m_uname: + for arch in expected: + m_uname.return_value = ('x-sysname', 'x-nodename', + 'x-release', 'x-version', arch) + print("now2 =%s", subp) + found[arch] = dmi.read_dmi_data(dmi_name) + self.assertEqual(expected, found) + + def test_none_returned_if_neither_source_has_data(self): + self.patch_mapping({}) + self._configure_dmidecode_return('key', 'value') + self.assertIsNone(dmi.read_dmi_data('expect-fail')) + + def test_none_returned_if_dmidecode_not_in_path(self): + self.patched_funcs.enter_context( + mock.patch.object(subp, 'which', lambda _: False)) + self.patch_mapping({}) + self.assertIsNone(dmi.read_dmi_data('expect-fail')) + + def test_empty_string_returned_instead_of_foxfox(self): + # uninitialized dmi values show as \xff, return empty string + my_len = 32 + dmi_value = b'\xff' * my_len + b'\n' + expected = "" + dmi_key = 'system-product-name' + sysfs_key = 'product_name' + self._create_sysfs_file(sysfs_key, dmi_value) + self.assertEqual(expected, dmi.read_dmi_data(dmi_key)) + + def test_container_returns_none(self): + """In a container read_dmi_data should always return None.""" + + # first verify we get the value if not in container + self._m_is_container.return_value = False + key, val = ("system-product-name", "my_product") + self._create_sysfs_file('product_name', val) + self.assertEqual(val, dmi.read_dmi_data(key)) + + # then verify in container returns None + self._m_is_container.return_value = True + self.assertIsNone(dmi.read_dmi_data(key)) + + def test_container_returns_none_on_unknown(self): + """In a container even bogus keys return None.""" + self._m_is_container.return_value = True + self._create_sysfs_file('product_name', "should-be-ignored") + self.assertIsNone(dmi.read_dmi_data("bogus")) + self.assertIsNone(dmi.read_dmi_data("system-product-name")) diff --git a/cloudinit/util.py b/cloudinit/util.py index b8856af1..bdb3694d 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -159,32 +159,6 @@ def fully_decoded_payload(part): return cte_payload -# Path for DMI Data -DMI_SYS_PATH = "/sys/class/dmi/id" - -# dmidecode and /sys/class/dmi/id/* use different names for the same value, -# this allows us to refer to them by one canonical name -DMIDECODE_TO_DMI_SYS_MAPPING = { - 'baseboard-asset-tag': 'board_asset_tag', - 'baseboard-manufacturer': 'board_vendor', - 'baseboard-product-name': 'board_name', - 'baseboard-serial-number': 'board_serial', - 'baseboard-version': 'board_version', - 'bios-release-date': 'bios_date', - 'bios-vendor': 'bios_vendor', - 'bios-version': 'bios_version', - 'chassis-asset-tag': 'chassis_asset_tag', - 'chassis-manufacturer': 'chassis_vendor', - 'chassis-serial-number': 'chassis_serial', - 'chassis-version': 'chassis_version', - 'system-manufacturer': 'sys_vendor', - 'system-product-name': 'product_name', - 'system-serial-number': 'product_serial', - 'system-uuid': 'product_uuid', - 'system-version': 'product_version', -} - - class SeLinuxGuard(object): def __init__(self, path, recursive=False): # Late import since it might not always @@ -2421,57 +2395,6 @@ def human2bytes(size): return int(num * mpliers[mplier]) -def _read_dmi_syspath(key): - """ - Reads dmi data with from /sys/class/dmi/id - """ - if key not in DMIDECODE_TO_DMI_SYS_MAPPING: - return None - mapped_key = DMIDECODE_TO_DMI_SYS_MAPPING[key] - dmi_key_path = "{0}/{1}".format(DMI_SYS_PATH, mapped_key) - LOG.debug("querying dmi data %s", dmi_key_path) - try: - if not os.path.exists(dmi_key_path): - LOG.debug("did not find %s", dmi_key_path) - return None - - key_data = load_file(dmi_key_path, decode=False) - if not key_data: - LOG.debug("%s did not return any data", dmi_key_path) - return None - - # uninitialized dmi values show as all \xff and /sys appends a '\n'. - # in that event, return a string of '.' in the same length. - if key_data == b'\xff' * (len(key_data) - 1) + b'\n': - key_data = b"" - - str_data = key_data.decode('utf8').strip() - LOG.debug("dmi data %s returned %s", dmi_key_path, str_data) - return str_data - - except Exception: - logexc(LOG, "failed read of %s", dmi_key_path) - return None - - -def _call_dmidecode(key, dmidecode_path): - """ - Calls out to dmidecode to get the data out. This is mostly for supporting - OS's without /sys/class/dmi/id support. - """ - try: - cmd = [dmidecode_path, "--string", key] - (result, _err) = subp.subp(cmd) - result = result.strip() - LOG.debug("dmidecode returned '%s' for '%s'", result, key) - if result.replace(".", "") == "": - return "" - return result - except (IOError, OSError) as e: - LOG.debug('failed dmidecode cmd: %s\n%s', cmd, e) - return None - - def is_x86(uname_arch=None): """Return True if platform is x86-based""" if uname_arch is None: @@ -2482,48 +2405,6 @@ def is_x86(uname_arch=None): return x86_arch_match -def read_dmi_data(key): - """ - Wrapper for reading DMI data. - - If running in a container return None. This is because DMI data is - assumed to be not useful in a container as it does not represent the - container but rather the host. - - This will do the following (returning the first that produces a - result): - 1) Use a mapping to translate `key` from dmidecode naming to - sysfs naming and look in /sys/class/dmi/... for a value. - 2) Use `key` as a sysfs key directly and look in /sys/class/dmi/... - 3) Fall-back to passing `key` to `dmidecode --string`. - - If all of the above fail to find a value, None will be returned. - """ - - if is_container(): - return None - - syspath_value = _read_dmi_syspath(key) - if syspath_value is not None: - return syspath_value - - # running dmidecode can be problematic on some arches (LP: #1243287) - uname_arch = os.uname()[4] - if not (is_x86(uname_arch) or - uname_arch == 'aarch64' or - uname_arch == 'amd64'): - LOG.debug("dmidata is not supported on %s", uname_arch) - return None - - dmidecode_path = subp.which('dmidecode') - if dmidecode_path: - return _call_dmidecode(key, dmidecode_path) - - LOG.warning("did not find either path %s or dmidecode command", - DMI_SYS_PATH) - return None - - def message_from_string(string): if sys.version_info[:2] < (2, 7): return email.message_from_file(io.StringIO(string)) diff --git a/tests/unittests/test_datasource/test_aliyun.py b/tests/unittests/test_datasource/test_aliyun.py index b626229e..eb2828d5 100644 --- a/tests/unittests/test_datasource/test_aliyun.py +++ b/tests/unittests/test_datasource/test_aliyun.py @@ -188,7 +188,7 @@ class TestIsAliYun(test_helpers.CiTestCase): ALIYUN_PRODUCT = 'Alibaba Cloud ECS' read_dmi_data_expected = [mock.call('system-product-name')] - @mock.patch("cloudinit.sources.DataSourceAliYun.util.read_dmi_data") + @mock.patch("cloudinit.sources.DataSourceAliYun.dmi.read_dmi_data") def test_true_on_aliyun_product(self, m_read_dmi_data): """Should return true if the dmi product data has expected value.""" m_read_dmi_data.return_value = self.ALIYUN_PRODUCT @@ -197,7 +197,7 @@ class TestIsAliYun(test_helpers.CiTestCase): m_read_dmi_data.call_args_list) self.assertEqual(True, ret) - @mock.patch("cloudinit.sources.DataSourceAliYun.util.read_dmi_data") + @mock.patch("cloudinit.sources.DataSourceAliYun.dmi.read_dmi_data") def test_false_on_empty_string(self, m_read_dmi_data): """Should return false on empty value returned.""" m_read_dmi_data.return_value = "" @@ -206,7 +206,7 @@ class TestIsAliYun(test_helpers.CiTestCase): m_read_dmi_data.call_args_list) self.assertEqual(False, ret) - @mock.patch("cloudinit.sources.DataSourceAliYun.util.read_dmi_data") + @mock.patch("cloudinit.sources.DataSourceAliYun.dmi.read_dmi_data") def test_false_on_unknown_string(self, m_read_dmi_data): """Should return false on an unrelated string.""" m_read_dmi_data.return_value = "cubs win" diff --git a/tests/unittests/test_datasource/test_altcloud.py b/tests/unittests/test_datasource/test_altcloud.py index fc59d1d5..7a5393ac 100644 --- a/tests/unittests/test_datasource/test_altcloud.py +++ b/tests/unittests/test_datasource/test_altcloud.py @@ -14,6 +14,7 @@ import os import shutil import tempfile +from cloudinit import dmi from cloudinit import helpers from cloudinit import subp from cloudinit import util @@ -88,14 +89,14 @@ class TestGetCloudType(CiTestCase): super(TestGetCloudType, self).setUp() self.tmp = self.tmp_dir() self.paths = helpers.Paths({'cloud_dir': self.tmp}) - self.dmi_data = util.read_dmi_data + self.dmi_data = dmi.read_dmi_data # We have a different code path for arm to deal with LP1243287 # We have to switch arch to x86_64 to avoid test failure force_arch('x86_64') def tearDown(self): # Reset - util.read_dmi_data = self.dmi_data + dmi.read_dmi_data = self.dmi_data force_arch() def test_cloud_info_file_ioerror(self): @@ -123,7 +124,7 @@ class TestGetCloudType(CiTestCase): Test method get_cloud_type() for RHEVm systems. Forcing read_dmi_data return to match a RHEVm system: RHEV Hypervisor ''' - util.read_dmi_data = _dmi_data('RHEV') + dmi.read_dmi_data = _dmi_data('RHEV') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) self.assertEqual('RHEV', dsrc.get_cloud_type()) @@ -132,7 +133,7 @@ class TestGetCloudType(CiTestCase): Test method get_cloud_type() for vSphere systems. Forcing read_dmi_data return to match a vSphere system: RHEV Hypervisor ''' - util.read_dmi_data = _dmi_data('VMware Virtual Platform') + dmi.read_dmi_data = _dmi_data('VMware Virtual Platform') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) self.assertEqual('VSPHERE', dsrc.get_cloud_type()) @@ -141,7 +142,7 @@ class TestGetCloudType(CiTestCase): Test method get_cloud_type() for unknown systems. Forcing read_dmi_data return to match an unrecognized return. ''' - util.read_dmi_data = _dmi_data('Unrecognized Platform') + dmi.read_dmi_data = _dmi_data('Unrecognized Platform') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) self.assertEqual('UNKNOWN', dsrc.get_cloud_type()) @@ -219,7 +220,7 @@ class TestGetDataNoCloudInfoFile(CiTestCase): self.tmp = self.tmp_dir() self.paths = helpers.Paths( {'cloud_dir': self.tmp, 'run_dir': self.tmp}) - self.dmi_data = util.read_dmi_data + self.dmi_data = dmi.read_dmi_data dsac.CLOUD_INFO_FILE = \ 'no such file' # We have a different code path for arm to deal with LP1243287 @@ -230,14 +231,14 @@ class TestGetDataNoCloudInfoFile(CiTestCase): # Reset dsac.CLOUD_INFO_FILE = \ '/etc/sysconfig/cloud-info' - util.read_dmi_data = self.dmi_data + dmi.read_dmi_data = self.dmi_data # Return back to original arch force_arch() def test_rhev_no_cloud_file(self): '''Test No cloud info file module get_data() forcing RHEV.''' - util.read_dmi_data = _dmi_data('RHEV Hypervisor') + dmi.read_dmi_data = _dmi_data('RHEV Hypervisor') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) dsrc.user_data_rhevm = lambda: True self.assertEqual(True, dsrc.get_data()) @@ -245,7 +246,7 @@ class TestGetDataNoCloudInfoFile(CiTestCase): def test_vsphere_no_cloud_file(self): '''Test No cloud info file module get_data() forcing VSPHERE.''' - util.read_dmi_data = _dmi_data('VMware Virtual Platform') + dmi.read_dmi_data = _dmi_data('VMware Virtual Platform') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) dsrc.user_data_vsphere = lambda: True self.assertEqual(True, dsrc.get_data()) @@ -253,7 +254,7 @@ class TestGetDataNoCloudInfoFile(CiTestCase): def test_failure_no_cloud_file(self): '''Test No cloud info file module get_data() forcing unrecognized.''' - util.read_dmi_data = _dmi_data('Unrecognized Platform') + dmi.read_dmi_data = _dmi_data('Unrecognized Platform') dsrc = dsac.DataSourceAltCloud({}, None, self.paths) self.assertEqual(False, dsrc.get_data()) diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 56c1cf18..433fbc66 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -570,7 +570,7 @@ scbus-1 on xpt0 bus 0 (dsaz, 'set_hostname', mock.MagicMock()), (dsaz, 'get_metadata_from_fabric', self.get_metadata_from_fabric), (dsaz.subp, 'which', lambda x: True), - (dsaz.util, 'read_dmi_data', mock.MagicMock( + (dsaz.dmi, 'read_dmi_data', mock.MagicMock( side_effect=_dmi_mocks)), (dsaz.util, 'wait_for_files', mock.MagicMock( side_effect=_wait_for_files)), @@ -1427,7 +1427,7 @@ class TestAzureBounce(CiTestCase): raise RuntimeError('should not get here') self.patches.enter_context( - mock.patch.object(dsaz.util, 'read_dmi_data', + mock.patch.object(dsaz.dmi, 'read_dmi_data', mock.MagicMock(side_effect=_dmi_mocks))) def setUp(self): @@ -2294,14 +2294,14 @@ class TestWBIsPlatformViable(CiTestCase): """White box tests for _is_platform_viable.""" with_logs = True - @mock.patch(MOCKPATH + 'util.read_dmi_data') + @mock.patch(MOCKPATH + 'dmi.read_dmi_data') def test_true_on_non_azure_chassis(self, m_read_dmi_data): """Return True if DMI chassis-asset-tag is AZURE_CHASSIS_ASSET_TAG.""" m_read_dmi_data.return_value = dsaz.AZURE_CHASSIS_ASSET_TAG self.assertTrue(dsaz._is_platform_viable('doesnotmatter')) @mock.patch(MOCKPATH + 'os.path.exists') - @mock.patch(MOCKPATH + 'util.read_dmi_data') + @mock.patch(MOCKPATH + 'dmi.read_dmi_data') def test_true_on_azure_ovf_env_in_seed_dir(self, m_read_dmi_data, m_exist): """Return True if ovf-env.xml exists in known seed dirs.""" # Non-matching Azure chassis-asset-tag @@ -2322,7 +2322,7 @@ class TestWBIsPlatformViable(CiTestCase): MOCKPATH, {'os.path.exists': False, # Non-matching Azure chassis-asset-tag - 'util.read_dmi_data': dsaz.AZURE_CHASSIS_ASSET_TAG + 'X', + 'dmi.read_dmi_data': dsaz.AZURE_CHASSIS_ASSET_TAG + 'X', 'subp.which': None}, dsaz._is_platform_viable, 'doesnotmatter')) self.assertIn( diff --git a/tests/unittests/test_datasource/test_nocloud.py b/tests/unittests/test_datasource/test_nocloud.py index 2e6b53ff..02cc9b38 100644 --- a/tests/unittests/test_datasource/test_nocloud.py +++ b/tests/unittests/test_datasource/test_nocloud.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import dmi from cloudinit import helpers from cloudinit.sources.DataSourceNoCloud import ( DataSourceNoCloud as dsNoCloud, @@ -30,7 +31,7 @@ class TestNoCloudDataSource(CiTestCase): self.mocks.enter_context( mock.patch.object(util, 'get_cmdline', return_value=self.cmdline)) self.mocks.enter_context( - mock.patch.object(util, 'read_dmi_data', return_value=None)) + mock.patch.object(dmi, 'read_dmi_data', return_value=None)) def _test_fs_config_is_read(self, fs_label, fs_label_to_search): vfat_device = 'device-1' diff --git a/tests/unittests/test_datasource/test_openstack.py b/tests/unittests/test_datasource/test_openstack.py index 9b0c1b8a..415755aa 100644 --- a/tests/unittests/test_datasource/test_openstack.py +++ b/tests/unittests/test_datasource/test_openstack.py @@ -459,7 +459,7 @@ class TestDetectOpenStack(test_helpers.CiTestCase): ds.detect_openstack(), 'Expected detect_openstack == True') @test_helpers.mock.patch(MOCK_PATH + 'util.get_proc_env') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_not_detect_openstack_intel_x86_ec2(self, m_dmi, m_proc_env, m_is_x86): """Return False on EC2 platforms.""" @@ -479,7 +479,7 @@ class TestDetectOpenStack(test_helpers.CiTestCase): ds.detect_openstack(), 'Expected detect_openstack == False on EC2') m_proc_env.assert_called_with(1) - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_intel_product_name_compute(self, m_dmi, m_is_x86): """Return True on OpenStack compute and nova instances.""" @@ -491,7 +491,7 @@ class TestDetectOpenStack(test_helpers.CiTestCase): self.assertTrue( ds.detect_openstack(), 'Failed to detect_openstack') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_opentelekomcloud_chassis_asset_tag(self, m_dmi, m_is_x86): """Return True on OpenStack reporting OpenTelekomCloud asset-tag.""" @@ -509,7 +509,7 @@ class TestDetectOpenStack(test_helpers.CiTestCase): ds.detect_openstack(), 'Expected detect_openstack == True on OpenTelekomCloud') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_sapccloud_chassis_asset_tag(self, m_dmi, m_is_x86): """Return True on OpenStack reporting SAP CCloud VM asset-tag.""" @@ -527,7 +527,7 @@ class TestDetectOpenStack(test_helpers.CiTestCase): ds.detect_openstack(), 'Expected detect_openstack == True on SAP CCloud VM') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_oraclecloud_chassis_asset_tag(self, m_dmi, m_is_x86): """Return True on OpenStack reporting Oracle cloud asset-tag.""" @@ -566,20 +566,20 @@ class TestDetectOpenStack(test_helpers.CiTestCase): ds.detect_openstack(), 'Expected detect_openstack == True on Generic OpenStack Platform') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_nova_chassis_asset_tag(self, m_dmi, m_is_x86): self._test_detect_openstack_nova_compute_chassis_asset_tag( m_dmi, m_is_x86, 'OpenStack Nova') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_compute_chassis_asset_tag(self, m_dmi, m_is_x86): self._test_detect_openstack_nova_compute_chassis_asset_tag( m_dmi, m_is_x86, 'OpenStack Compute') @test_helpers.mock.patch(MOCK_PATH + 'util.get_proc_env') - @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') + @test_helpers.mock.patch(MOCK_PATH + 'dmi.read_dmi_data') def test_detect_openstack_by_proc_1_environ(self, m_dmi, m_proc_env, m_is_x86): """Return True when nova product_name specified in /proc/1/environ.""" diff --git a/tests/unittests/test_datasource/test_ovf.py b/tests/unittests/test_datasource/test_ovf.py index 1d088577..16773de5 100644 --- a/tests/unittests/test_datasource/test_ovf.py +++ b/tests/unittests/test_datasource/test_ovf.py @@ -129,7 +129,7 @@ class TestDatasourceOVF(CiTestCase): ds = self.datasource(sys_cfg={}, distro={}, paths=paths) retcode = wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': None, + {'dmi.read_dmi_data': None, 'transport_iso9660': NOT_FOUND, 'transport_vmware_guestinfo': NOT_FOUND}, ds.get_data) @@ -145,7 +145,7 @@ class TestDatasourceOVF(CiTestCase): paths=paths) retcode = wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': 'vmware', + {'dmi.read_dmi_data': 'vmware', 'transport_iso9660': NOT_FOUND, 'transport_vmware_guestinfo': NOT_FOUND}, ds.get_data) @@ -174,7 +174,7 @@ class TestDatasourceOVF(CiTestCase): with self.assertRaises(CustomScriptNotFound) as context: wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': 'vmware', + {'dmi.read_dmi_data': 'vmware', 'util.del_dir': True, 'search_file': self.tdir, 'wait_for_imc_cfg_file': conf_file, @@ -211,7 +211,7 @@ class TestDatasourceOVF(CiTestCase): with self.assertRaises(RuntimeError) as context: wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': 'vmware', + {'dmi.read_dmi_data': 'vmware', 'util.del_dir': True, 'search_file': self.tdir, 'wait_for_imc_cfg_file': conf_file, @@ -246,7 +246,7 @@ class TestDatasourceOVF(CiTestCase): with self.assertRaises(CustomScriptNotFound) as context: wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': 'vmware', + {'dmi.read_dmi_data': 'vmware', 'util.del_dir': True, 'search_file': self.tdir, 'wait_for_imc_cfg_file': conf_file, @@ -290,7 +290,7 @@ class TestDatasourceOVF(CiTestCase): with self.assertRaises(CustomScriptNotFound) as context: wrap_and_call( 'cloudinit.sources.DataSourceOVF', - {'util.read_dmi_data': 'vmware', + {'dmi.read_dmi_data': 'vmware', 'util.del_dir': True, 'search_file': self.tdir, 'wait_for_imc_cfg_file': conf_file, @@ -313,7 +313,7 @@ class TestDatasourceOVF(CiTestCase): self.assertEqual('ovf', ds.cloud_name) self.assertEqual('ovf', ds.platform_type) - with mock.patch(MPATH + 'util.read_dmi_data', return_value='!VMware'): + with mock.patch(MPATH + 'dmi.read_dmi_data', return_value='!VMware'): with mock.patch(MPATH + 'transport_vmware_guestinfo') as m_guestd: with mock.patch(MPATH + 'transport_iso9660') as m_iso9660: m_iso9660.return_value = NOT_FOUND @@ -334,7 +334,7 @@ class TestDatasourceOVF(CiTestCase): self.assertEqual('ovf', ds.cloud_name) self.assertEqual('ovf', ds.platform_type) - with mock.patch(MPATH + 'util.read_dmi_data', return_value='VMWare'): + with mock.patch(MPATH + 'dmi.read_dmi_data', return_value='VMWare'): with mock.patch(MPATH + 'transport_vmware_guestinfo') as m_guestd: with mock.patch(MPATH + 'transport_iso9660') as m_iso9660: m_iso9660.return_value = NOT_FOUND diff --git a/tests/unittests/test_datasource/test_scaleway.py b/tests/unittests/test_datasource/test_scaleway.py index 9d82bda9..32f3274a 100644 --- a/tests/unittests/test_datasource/test_scaleway.py +++ b/tests/unittests/test_datasource/test_scaleway.py @@ -87,7 +87,7 @@ class TestOnScaleway(CiTestCase): @mock.patch('cloudinit.util.get_cmdline') @mock.patch('os.path.exists') - @mock.patch('cloudinit.util.read_dmi_data') + @mock.patch('cloudinit.dmi.read_dmi_data') def test_not_on_scaleway(self, m_read_dmi_data, m_file_exists, m_get_cmdline): self.install_mocks( @@ -105,7 +105,7 @@ class TestOnScaleway(CiTestCase): @mock.patch('cloudinit.util.get_cmdline') @mock.patch('os.path.exists') - @mock.patch('cloudinit.util.read_dmi_data') + @mock.patch('cloudinit.dmi.read_dmi_data') def test_on_scaleway_dmi(self, m_read_dmi_data, m_file_exists, m_get_cmdline): """ @@ -121,7 +121,7 @@ class TestOnScaleway(CiTestCase): @mock.patch('cloudinit.util.get_cmdline') @mock.patch('os.path.exists') - @mock.patch('cloudinit.util.read_dmi_data') + @mock.patch('cloudinit.dmi.read_dmi_data') def test_on_scaleway_var_run_scaleway(self, m_read_dmi_data, m_file_exists, m_get_cmdline): """ @@ -136,7 +136,7 @@ class TestOnScaleway(CiTestCase): @mock.patch('cloudinit.util.get_cmdline') @mock.patch('os.path.exists') - @mock.patch('cloudinit.util.read_dmi_data') + @mock.patch('cloudinit.dmi.read_dmi_data') def test_on_scaleway_cmdline(self, m_read_dmi_data, m_file_exists, m_get_cmdline): """ diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index cca53123..857629f1 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -492,129 +492,6 @@ class TestIsX86(helpers.CiTestCase): self.assertTrue(util.is_x86()) -class TestReadDMIData(helpers.FilesystemMockingTestCase): - - def setUp(self): - super(TestReadDMIData, self).setUp() - self.new_root = tempfile.mkdtemp() - self.addCleanup(shutil.rmtree, self.new_root) - self.patchOS(self.new_root) - self.patchUtils(self.new_root) - p = mock.patch("cloudinit.util.is_container", return_value=False) - self.addCleanup(p.stop) - self._m_is_container = p.start() - - def _create_sysfs_parent_directory(self): - util.ensure_dir(os.path.join('sys', 'class', 'dmi', 'id')) - - def _create_sysfs_file(self, key, content): - """Mocks the sys path found on Linux systems.""" - self._create_sysfs_parent_directory() - dmi_key = "/sys/class/dmi/id/{0}".format(key) - util.write_file(dmi_key, content) - - def _configure_dmidecode_return(self, key, content, error=None): - """ - In order to test a missing sys path and call outs to dmidecode, this - function fakes the results of dmidecode to test the results. - """ - def _dmidecode_subp(cmd): - if cmd[-1] != key: - raise subp.ProcessExecutionError() - return (content, error) - - self.patched_funcs.enter_context( - mock.patch("cloudinit.subp.which", side_effect=lambda _: True)) - self.patched_funcs.enter_context( - mock.patch("cloudinit.subp.subp", side_effect=_dmidecode_subp)) - - def patch_mapping(self, new_mapping): - self.patched_funcs.enter_context( - mock.patch('cloudinit.util.DMIDECODE_TO_DMI_SYS_MAPPING', - new_mapping)) - - def test_sysfs_used_with_key_in_mapping_and_file_on_disk(self): - self.patch_mapping({'mapped-key': 'mapped-value'}) - expected_dmi_value = 'sys-used-correctly' - self._create_sysfs_file('mapped-value', expected_dmi_value) - self._configure_dmidecode_return('mapped-key', 'wrong-wrong-wrong') - self.assertEqual(expected_dmi_value, util.read_dmi_data('mapped-key')) - - def test_dmidecode_used_if_no_sysfs_file_on_disk(self): - self.patch_mapping({}) - self._create_sysfs_parent_directory() - expected_dmi_value = 'dmidecode-used' - self._configure_dmidecode_return('use-dmidecode', expected_dmi_value) - with mock.patch("cloudinit.util.os.uname") as m_uname: - m_uname.return_value = ('x-sysname', 'x-nodename', - 'x-release', 'x-version', 'x86_64') - self.assertEqual(expected_dmi_value, - util.read_dmi_data('use-dmidecode')) - - def test_dmidecode_not_used_on_arm(self): - self.patch_mapping({}) - print("current =%s", subp) - self._create_sysfs_parent_directory() - dmi_val = 'from-dmidecode' - dmi_name = 'use-dmidecode' - self._configure_dmidecode_return(dmi_name, dmi_val) - print("now =%s", subp) - - expected = {'armel': None, 'aarch64': dmi_val, 'x86_64': dmi_val} - found = {} - # we do not run the 'dmi-decode' binary on some arches - # verify that anything requested that is not in the sysfs dir - # will return None on those arches. - with mock.patch("cloudinit.util.os.uname") as m_uname: - for arch in expected: - m_uname.return_value = ('x-sysname', 'x-nodename', - 'x-release', 'x-version', arch) - print("now2 =%s", subp) - found[arch] = util.read_dmi_data(dmi_name) - self.assertEqual(expected, found) - - def test_none_returned_if_neither_source_has_data(self): - self.patch_mapping({}) - self._configure_dmidecode_return('key', 'value') - self.assertIsNone(util.read_dmi_data('expect-fail')) - - def test_none_returned_if_dmidecode_not_in_path(self): - self.patched_funcs.enter_context( - mock.patch.object(subp, 'which', lambda _: False)) - self.patch_mapping({}) - self.assertIsNone(util.read_dmi_data('expect-fail')) - - def test_dots_returned_instead_of_foxfox(self): - # uninitialized dmi values show as \xff, return those as . - my_len = 32 - dmi_value = b'\xff' * my_len + b'\n' - expected = "" - dmi_key = 'system-product-name' - sysfs_key = 'product_name' - self._create_sysfs_file(sysfs_key, dmi_value) - self.assertEqual(expected, util.read_dmi_data(dmi_key)) - - def test_container_returns_none(self): - """In a container read_dmi_data should always return None.""" - - # first verify we get the value if not in container - self._m_is_container.return_value = False - key, val = ("system-product-name", "my_product") - self._create_sysfs_file('product_name', val) - self.assertEqual(val, util.read_dmi_data(key)) - - # then verify in container returns None - self._m_is_container.return_value = True - self.assertIsNone(util.read_dmi_data(key)) - - def test_container_returns_none_on_unknown(self): - """In a container even bogus keys return None.""" - self._m_is_container.return_value = True - self._create_sysfs_file('product_name', "should-be-ignored") - self.assertIsNone(util.read_dmi_data("bogus")) - self.assertIsNone(util.read_dmi_data("system-product-name")) - - class TestGetConfigLogfiles(helpers.CiTestCase): def test_empty_cfg_returns_empty_list(self): -- cgit v1.2.3 From d807df288f8cef29ca74f0b00c326b084e825782 Mon Sep 17 00:00:00 2001 From: Johnson Shi Date: Wed, 18 Nov 2020 09:34:04 -0800 Subject: DataSourceAzure: send failure signal on Azure datasource failure (#594) On systems where the Azure datasource is a viable platform for crawling metadata, cloud-init occasionally encounters fatal irrecoverable errors during the crawling of the Azure datasource. When this happens, cloud-init crashes, and Azure VM provisioning would fail. However, instead of failing immediately, the user will continue seeing provisioning for a long time until it times out with "OS Provisioning Timed Out" message. In these situations, cloud-init should report failure to the Azure datasource endpoint indicating provisioning failure. The user will immediately see provisioning terminate, giving them a much better failure experience instead of pointlessly waiting for OS provisioning timeout. --- cloudinit/sources/DataSourceAzure.py | 73 ++- cloudinit/sources/helpers/azure.py | 80 ++- tests/unittests/test_datasource/test_azure.py | 322 ++++++++++-- .../unittests/test_datasource/test_azure_helper.py | 569 ++++++++++++++++++--- 4 files changed, 921 insertions(+), 123 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index fa3e0a2b..ab139b8d 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -29,6 +29,7 @@ from cloudinit import util from cloudinit.reporting import events from cloudinit.sources.helpers.azure import ( + DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE, azure_ds_reporter, azure_ds_telemetry_reporter, get_metadata_from_fabric, @@ -38,7 +39,8 @@ from cloudinit.sources.helpers.azure import ( EphemeralDHCPv4WithReporting, is_byte_swapped, dhcp_log_cb, - push_log_to_kvp) + push_log_to_kvp, + report_failure_to_fabric) LOG = logging.getLogger(__name__) @@ -508,8 +510,9 @@ class DataSourceAzure(sources.DataSource): if perform_reprovision: LOG.info("Reporting ready to Azure after getting ReprovisionData") - use_cached_ephemeral = (net.is_up(self.fallback_interface) and - getattr(self, '_ephemeral_dhcp_ctx', None)) + use_cached_ephemeral = ( + self.distro.networking.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None)) if use_cached_ephemeral: self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral @@ -560,9 +563,14 @@ class DataSourceAzure(sources.DataSource): logfunc=LOG.debug, msg='Crawl of metadata service', func=self.crawl_metadata ) - except sources.InvalidMetaDataException as e: - LOG.warning('Could not crawl Azure metadata: %s', e) + except Exception as e: + report_diagnostic_event( + 'Could not crawl Azure metadata: %s' % e, + logger_func=LOG.error) + self._report_failure( + description=DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE) return False + if (self.distro and self.distro.name == 'ubuntu' and self.ds_cfg.get('apply_network_config')): maybe_remove_ubuntu_network_config_scripts() @@ -785,6 +793,61 @@ class DataSourceAzure(sources.DataSource): return return_val @azure_ds_telemetry_reporter + def _report_failure(self, description=None) -> bool: + """Tells the Azure fabric that provisioning has failed. + + @param description: A description of the error encountered. + @return: The success status of sending the failure signal. + """ + unknown_245_key = 'unknown-245' + + try: + if (self.distro.networking.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None) and + getattr(self._ephemeral_dhcp_ctx, 'lease', None) and + unknown_245_key in self._ephemeral_dhcp_ctx.lease): + report_diagnostic_event( + 'Using cached ephemeral dhcp context ' + 'to report failure to Azure', logger_func=LOG.debug) + report_failure_to_fabric( + dhcp_opts=self._ephemeral_dhcp_ctx.lease[unknown_245_key], + description=description) + self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using ' + 'cached ephemeral dhcp context: %s' % e, + logger_func=LOG.error) + + try: + report_diagnostic_event( + 'Using new ephemeral dhcp to report failure to Azure', + logger_func=LOG.debug) + with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: + report_failure_to_fabric( + dhcp_opts=lease[unknown_245_key], + description=description) + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using new ephemeral dhcp: %s' % e, + logger_func=LOG.debug) + + try: + report_diagnostic_event( + 'Using fallback lease to report failure to Azure') + report_failure_to_fabric( + fallback_lease_file=self.dhclient_lease_file, + description=description) + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using fallback lease: %s' % e, + logger_func=LOG.debug) + + return False + def _report_ready(self, lease: dict) -> bool: """Tells the fabric provisioning has completed. diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 4071a50e..951c7a10 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -17,6 +17,7 @@ from cloudinit import stages from cloudinit import temp_utils from contextlib import contextmanager from xml.etree import ElementTree +from xml.sax.saxutils import escape from cloudinit import subp from cloudinit import url_helper @@ -50,6 +51,11 @@ azure_ds_reporter = events.ReportEventStack( description="initialize reporter for azure ds", reporting_enabled=True) +DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE = ( + 'The VM encountered an error during deployment. ' + 'Please visit https://aka.ms/linuxprovisioningerror ' + 'for more information on remediation.') + def azure_ds_telemetry_reporter(func): def impl(*args, **kwargs): @@ -379,12 +385,20 @@ class OpenSSLManager: def __init__(self): self.tmpdir = temp_utils.mkdtemp() - self.certificate = None + self._certificate = None self.generate_certificate() def clean_up(self): util.del_dir(self.tmpdir) + @property + def certificate(self): + return self._certificate + + @certificate.setter + def certificate(self, value): + self._certificate = value + @azure_ds_telemetry_reporter def generate_certificate(self): LOG.debug('Generating certificate for communication with fabric...') @@ -507,6 +521,10 @@ class GoalStateHealthReporter: ''') PROVISIONING_SUCCESS_STATUS = 'Ready' + PROVISIONING_NOT_READY_STATUS = 'NotReady' + PROVISIONING_FAILURE_SUBSTATUS = 'ProvisioningFailed' + + HEALTH_REPORT_DESCRIPTION_TRIM_LEN = 512 def __init__( self, goal_state: GoalState, @@ -545,19 +563,39 @@ class GoalStateHealthReporter: LOG.info('Reported ready to Azure fabric.') + @azure_ds_telemetry_reporter + def send_failure_signal(self, description: str) -> None: + document = self.build_report( + incarnation=self._goal_state.incarnation, + container_id=self._goal_state.container_id, + instance_id=self._goal_state.instance_id, + status=self.PROVISIONING_NOT_READY_STATUS, + substatus=self.PROVISIONING_FAILURE_SUBSTATUS, + description=description) + try: + self._post_health_report(document=document) + except Exception as e: + msg = "exception while reporting failure: %s" % e + report_diagnostic_event(msg, logger_func=LOG.error) + raise + + LOG.warning('Reported failure to Azure fabric.') + def build_report( self, incarnation: str, container_id: str, instance_id: str, status: str, substatus=None, description=None) -> str: health_detail = '' if substatus is not None: health_detail = self.HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE.format( - health_substatus=substatus, health_description=description) + health_substatus=escape(substatus), + health_description=escape( + description[:self.HEALTH_REPORT_DESCRIPTION_TRIM_LEN])) health_report = self.HEALTH_REPORT_XML_TEMPLATE.format( - incarnation=incarnation, - container_id=container_id, - instance_id=instance_id, - health_status=status, + incarnation=escape(str(incarnation)), + container_id=escape(container_id), + instance_id=escape(instance_id), + health_status=escape(status), health_detail_subsection=health_detail) return health_report @@ -797,6 +835,20 @@ class WALinuxAgentShim: health_reporter.send_ready_signal() return {'public-keys': ssh_keys} + @azure_ds_telemetry_reporter + def register_with_azure_and_report_failure(self, description: str) -> None: + """Gets the VM's GoalState from Azure, uses the GoalState information + to report failure/send provisioning failure signal to Azure. + + @param: user visible error description of provisioning failure. + """ + if self.azure_endpoint_client is None: + self.azure_endpoint_client = AzureEndpointHttpClient(None) + goal_state = self._fetch_goal_state_from_azure(need_certificate=False) + health_reporter = GoalStateHealthReporter( + goal_state, self.azure_endpoint_client, self.endpoint) + health_reporter.send_failure_signal(description=description) + @azure_ds_telemetry_reporter def _fetch_goal_state_from_azure( self, @@ -804,6 +856,7 @@ class WALinuxAgentShim: """Fetches the GoalState XML from the Azure endpoint, parses the XML, and returns a GoalState object. + @param need_certificate: switch to know if certificates is needed. @return: GoalState object representing the GoalState XML """ unparsed_goal_state_xml = self._get_raw_goal_state_xml_from_azure() @@ -844,6 +897,7 @@ class WALinuxAgentShim: """Parses a GoalState XML string and returns a GoalState object. @param unparsed_goal_state_xml: GoalState XML string + @param need_certificate: switch to know if certificates is needed. @return: GoalState object representing the GoalState XML """ try: @@ -942,6 +996,20 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None, shim.clean_up() +@azure_ds_telemetry_reporter +def report_failure_to_fabric(fallback_lease_file=None, dhcp_opts=None, + description=None): + shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file, + dhcp_options=dhcp_opts) + if not description: + description = DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE + try: + shim.register_with_azure_and_report_failure( + description=description) + finally: + shim.clean_up() + + def dhcp_log_cb(out, err): report_diagnostic_event( "dhclient output stream: %s" % out, logger_func=LOG.debug) diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 433fbc66..d9752ab7 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -461,6 +461,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): class TestAzureDataSource(CiTestCase): + with_logs = True + def setUp(self): super(TestAzureDataSource, self).setUp() self.tmp = self.tmp_dir() @@ -549,9 +551,12 @@ scbus-1 on xpt0 bus 0 dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d - self.get_metadata_from_fabric = mock.MagicMock(return_value={ - 'public-keys': [], - }) + self.m_is_platform_viable = mock.MagicMock(autospec=True) + self.m_get_metadata_from_fabric = mock.MagicMock( + return_value={'public-keys': []}) + self.m_report_failure_to_fabric = mock.MagicMock(autospec=True) + self.m_ephemeral_dhcpv4 = mock.MagicMock() + self.m_ephemeral_dhcpv4_with_reporting = mock.MagicMock() self.instance_id = 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8' @@ -568,7 +573,17 @@ scbus-1 on xpt0 bus 0 (dsaz, 'perform_hostname_bounce', mock.MagicMock()), (dsaz, 'get_hostname', mock.MagicMock()), (dsaz, 'set_hostname', mock.MagicMock()), - (dsaz, 'get_metadata_from_fabric', self.get_metadata_from_fabric), + (dsaz, '_is_platform_viable', + self.m_is_platform_viable), + (dsaz, 'get_metadata_from_fabric', + self.m_get_metadata_from_fabric), + (dsaz, 'report_failure_to_fabric', + self.m_report_failure_to_fabric), + (dsaz, 'EphemeralDHCPv4', self.m_ephemeral_dhcpv4), + (dsaz, 'EphemeralDHCPv4WithReporting', + self.m_ephemeral_dhcpv4_with_reporting), + (dsaz, 'get_boot_telemetry', mock.MagicMock()), + (dsaz, 'get_system_info', mock.MagicMock()), (dsaz.subp, 'which', lambda x: True), (dsaz.dmi, 'read_dmi_data', mock.MagicMock( side_effect=_dmi_mocks)), @@ -632,15 +647,87 @@ scbus-1 on xpt0 bus 0 dev = ds.get_resource_disk_on_freebsd(1) self.assertEqual("da1", dev) - @mock.patch(MOCKPATH + '_is_platform_viable') - def test_call_is_platform_viable_seed(self, m_is_platform_viable): + def test_not_is_platform_viable_seed_should_return_no_datasource(self): """Check seed_dir using _is_platform_viable and return False.""" # Return a non-matching asset tag value - m_is_platform_viable.return_value = False - dsrc = dsaz.DataSourceAzure( - {}, distro=mock.Mock(), paths=self.paths) - self.assertFalse(dsrc.get_data()) - m_is_platform_viable.assert_called_with(dsrc.seed_dir) + data = {} + dsrc = self._get_ds(data) + self.m_is_platform_viable.return_value = False + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc, '_report_failure') as m_report_failure: + ret = dsrc.get_data() + self.m_is_platform_viable.assert_called_with(dsrc.seed_dir) + self.assertFalse(ret) + self.assertNotIn('agent_invoked', data) + # Assert that for non viable platforms, + # there is no communication with the Azure datasource. + self.assertEqual( + 0, + m_crawl_metadata.call_count) + self.assertEqual( + 0, + m_report_failure.call_count) + + def test_platform_viable_but_no_devs_should_return_no_datasource(self): + """For platforms where the Azure platform is viable + (which is indicated by the matching asset tag), + the absence of any devs at all (devs == candidate sources + for crawling Azure datasource) is NOT expected. + Report failure to Azure as this is an unexpected fatal error. + """ + data = {} + dsrc = self._get_ds(data) + with mock.patch.object(dsrc, '_report_failure') as m_report_failure: + self.m_is_platform_viable.return_value = True + ret = dsrc.get_data() + self.m_is_platform_viable.assert_called_with(dsrc.seed_dir) + self.assertFalse(ret) + self.assertNotIn('agent_invoked', data) + self.assertEqual( + 1, + m_report_failure.call_count) + + def test_crawl_metadata_exception_returns_no_datasource(self): + data = {} + dsrc = self._get_ds(data) + self.m_is_platform_viable.return_value = True + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata: + m_crawl_metadata.side_effect = Exception + ret = dsrc.get_data() + self.m_is_platform_viable.assert_called_with(dsrc.seed_dir) + self.assertEqual( + 1, + m_crawl_metadata.call_count) + self.assertFalse(ret) + self.assertNotIn('agent_invoked', data) + + def test_crawl_metadata_exception_should_report_failure_with_msg(self): + data = {} + dsrc = self._get_ds(data) + self.m_is_platform_viable.return_value = True + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc, '_report_failure') as m_report_failure: + m_crawl_metadata.side_effect = Exception + dsrc.get_data() + self.assertEqual( + 1, + m_crawl_metadata.call_count) + m_report_failure.assert_called_once_with( + description=dsaz.DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE) + + def test_crawl_metadata_exc_should_log_could_not_crawl_msg(self): + data = {} + dsrc = self._get_ds(data) + self.m_is_platform_viable.return_value = True + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata: + m_crawl_metadata.side_effect = Exception + dsrc.get_data() + self.assertEqual( + 1, + m_crawl_metadata.call_count) + self.assertIn( + "Could not crawl Azure metadata", + self.logs.getvalue()) def test_basic_seed_dir(self): odata = {'HostName': "myhost", 'UserName': "myuser"} @@ -761,7 +848,7 @@ scbus-1 on xpt0 bus 0 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds') def test_crawl_metadata_on_reprovision_reports_ready( - self, poll_imds_func, report_ready_func, m_write, m_dhcp + self, poll_imds_func, m_report_ready, m_write, m_dhcp ): """If reprovisioning, report ready at the end""" ovfenv = construct_valid_ovf_env( @@ -775,18 +862,16 @@ scbus-1 on xpt0 bus 0 dsrc = self._get_ds(data) poll_imds_func.return_value = ovfenv dsrc.crawl_metadata() - self.assertEqual(1, report_ready_func.call_count) + self.assertEqual(1, m_report_ready.call_count) @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') @mock.patch('cloudinit.sources.helpers.netlink.' 'wait_for_media_disconnect_connect') @mock.patch( 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') - @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') - @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') @mock.patch('cloudinit.sources.DataSourceAzure.readurl') def test_crawl_metadata_on_reprovision_reports_ready_using_lease( - self, m_readurl, m_dhcp, m_net, report_ready_func, + self, m_readurl, m_report_ready, m_media_switch, m_write ): """If reprovisioning, report ready using the obtained lease""" @@ -800,20 +885,30 @@ scbus-1 on xpt0 bus 0 } dsrc = self._get_ds(data) - lease = { - 'interface': 'eth9', 'fixed-address': '192.168.2.9', - 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', - 'unknown-245': '624c3620'} - m_dhcp.return_value = [lease] - m_media_switch.return_value = None + with mock.patch.object(dsrc.distro.networking, 'is_up') \ + as m_dsrc_distro_networking_is_up: - reprovision_ovfenv = construct_valid_ovf_env() - m_readurl.return_value = url_helper.StringResponse( - reprovision_ovfenv.encode('utf-8')) + # For this mock, net should not be up, + # so that cached ephemeral won't be used. + # This is so that a NEW ephemeral dhcp lease will be discovered + # and used instead. + m_dsrc_distro_networking_is_up.return_value = False - dsrc.crawl_metadata() - self.assertEqual(2, report_ready_func.call_count) - report_ready_func.assert_called_with(lease=lease) + lease = { + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'} + self.m_ephemeral_dhcpv4_with_reporting.return_value \ + .__enter__.return_value = lease + m_media_switch.return_value = None + + reprovision_ovfenv = construct_valid_ovf_env() + m_readurl.return_value = url_helper.StringResponse( + reprovision_ovfenv.encode('utf-8')) + + dsrc.crawl_metadata() + self.assertEqual(2, m_report_ready.call_count) + m_report_ready.assert_called_with(lease=lease) def test_waagent_d_has_0700_perms(self): # we expect /var/lib/waagent to be created 0700 @@ -971,7 +1066,7 @@ scbus-1 on xpt0 bus 0 dsrc = self._get_ds(data) ret = dsrc.get_data() self.assertTrue(ret) - self.assertTrue('default_user' in dsrc.cfg['system_info']) + self.assertIn('default_user', dsrc.cfg['system_info']) defuser = dsrc.cfg['system_info']['default_user'] # default user should be updated username and should not be locked. @@ -993,7 +1088,7 @@ scbus-1 on xpt0 bus 0 dsrc = self._get_ds(data) ret = dsrc.get_data() self.assertTrue(ret) - self.assertTrue('default_user' in dsrc.cfg['system_info']) + self.assertIn('default_user', dsrc.cfg['system_info']) defuser = dsrc.cfg['system_info']['default_user'] # default user should be updated username and should not be locked. @@ -1021,14 +1116,6 @@ scbus-1 on xpt0 bus 0 self.assertTrue(ret) self.assertEqual(dsrc.userdata_raw, mydata.encode('utf-8')) - def test_no_datasource_expected(self): - # no source should be found if no seed_dir and no devs - data = {} - dsrc = self._get_ds({}) - ret = dsrc.get_data() - self.assertFalse(ret) - self.assertFalse('agent_invoked' in data) - def test_cfg_has_pubkeys_fingerprint(self): odata = {'HostName': "myhost", 'UserName': "myuser"} mypklist = [{'fingerprint': 'fp1', 'path': 'path1', 'value': ''}] @@ -1171,21 +1258,168 @@ scbus-1 on xpt0 bus 0 self): dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) dsrc.ds_cfg['agent_command'] = '__builtin__' - self.get_metadata_from_fabric.side_effect = Exception + self.m_get_metadata_from_fabric.side_effect = Exception self.assertFalse(dsrc._report_ready(lease=mock.MagicMock())) + def test_dsaz_report_failure_returns_true_when_report_succeeds(self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + self.assertTrue(dsrc._report_failure()) + self.assertEqual( + 1, + self.m_report_failure_to_fabric.call_count) + + def test_dsaz_report_failure_returns_false_and_does_not_propagate_exc( + self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc, '_ephemeral_dhcp_ctx') \ + as m_ephemeral_dhcp_ctx, \ + mock.patch.object(dsrc.distro.networking, 'is_up') \ + as m_dsrc_distro_networking_is_up: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + # setup mocks to allow using cached ephemeral dhcp lease + m_dsrc_distro_networking_is_up.return_value = True + test_lease_dhcp_option_245 = 'test_lease_dhcp_option_245' + test_lease = {'unknown-245': test_lease_dhcp_option_245} + m_ephemeral_dhcp_ctx.lease = test_lease + + # We expect 3 calls to report_failure_to_fabric, + # because we try 3 different methods of calling report failure. + # The different methods are attempted in the following order: + # 1. Using cached ephemeral dhcp context to report failure to Azure + # 2. Using new ephemeral dhcp to report failure to Azure + # 3. Using fallback lease to report failure to Azure + self.m_report_failure_to_fabric.side_effect = Exception + self.assertFalse(dsrc._report_failure()) + self.assertEqual( + 3, + self.m_report_failure_to_fabric.call_count) + + def test_dsaz_report_failure_description_msg(self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + test_msg = 'Test report failure description message' + self.assertTrue(dsrc._report_failure(description=test_msg)) + self.m_report_failure_to_fabric.assert_called_once_with( + dhcp_opts=mock.ANY, description=test_msg) + + def test_dsaz_report_failure_no_description_msg(self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata: + m_crawl_metadata.side_effect = Exception + + self.assertTrue(dsrc._report_failure()) # no description msg + self.m_report_failure_to_fabric.assert_called_once_with( + dhcp_opts=mock.ANY, description=None) + + def test_dsaz_report_failure_uses_cached_ephemeral_dhcp_ctx_lease(self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc, '_ephemeral_dhcp_ctx') \ + as m_ephemeral_dhcp_ctx, \ + mock.patch.object(dsrc.distro.networking, 'is_up') \ + as m_dsrc_distro_networking_is_up: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + # setup mocks to allow using cached ephemeral dhcp lease + m_dsrc_distro_networking_is_up.return_value = True + test_lease_dhcp_option_245 = 'test_lease_dhcp_option_245' + test_lease = {'unknown-245': test_lease_dhcp_option_245} + m_ephemeral_dhcp_ctx.lease = test_lease + + self.assertTrue(dsrc._report_failure()) + + # ensure called with cached ephemeral dhcp lease option 245 + self.m_report_failure_to_fabric.assert_called_once_with( + description=mock.ANY, dhcp_opts=test_lease_dhcp_option_245) + + # ensure cached ephemeral is cleaned + self.assertEqual( + 1, + m_ephemeral_dhcp_ctx.clean_network.call_count) + + def test_dsaz_report_failure_no_net_uses_new_ephemeral_dhcp_lease(self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc.distro.networking, 'is_up') \ + as m_dsrc_distro_networking_is_up: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + # net is not up and cannot use cached ephemeral dhcp + m_dsrc_distro_networking_is_up.return_value = False + # setup ephemeral dhcp lease discovery mock + test_lease_dhcp_option_245 = 'test_lease_dhcp_option_245' + test_lease = {'unknown-245': test_lease_dhcp_option_245} + self.m_ephemeral_dhcpv4_with_reporting.return_value \ + .__enter__.return_value = test_lease + + self.assertTrue(dsrc._report_failure()) + + # ensure called with the newly discovered + # ephemeral dhcp lease option 245 + self.m_report_failure_to_fabric.assert_called_once_with( + description=mock.ANY, dhcp_opts=test_lease_dhcp_option_245) + + def test_dsaz_report_failure_no_net_and_no_dhcp_uses_fallback_lease( + self): + dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) + dsrc.ds_cfg['agent_command'] = '__builtin__' + + with mock.patch.object(dsrc, 'crawl_metadata') as m_crawl_metadata, \ + mock.patch.object(dsrc.distro.networking, 'is_up') \ + as m_dsrc_distro_networking_is_up: + # mock crawl metadata failure to cause report failure + m_crawl_metadata.side_effect = Exception + + # net is not up and cannot use cached ephemeral dhcp + m_dsrc_distro_networking_is_up.return_value = False + # ephemeral dhcp discovery failure, + # so cannot use a new ephemeral dhcp + self.m_ephemeral_dhcpv4_with_reporting.return_value \ + .__enter__.side_effect = Exception + + self.assertTrue(dsrc._report_failure()) + + # ensure called with fallback lease + self.m_report_failure_to_fabric.assert_called_once_with( + description=mock.ANY, + fallback_lease_file=dsrc.dhclient_lease_file) + def test_exception_fetching_fabric_data_doesnt_propagate(self): """Errors communicating with fabric should warn, but return True.""" dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) dsrc.ds_cfg['agent_command'] = '__builtin__' - self.get_metadata_from_fabric.side_effect = Exception + self.m_get_metadata_from_fabric.side_effect = Exception ret = self._get_and_setup(dsrc) self.assertTrue(ret) def test_fabric_data_included_in_metadata(self): dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) dsrc.ds_cfg['agent_command'] = '__builtin__' - self.get_metadata_from_fabric.return_value = {'test': 'value'} + self.m_get_metadata_from_fabric.return_value = {'test': 'value'} ret = self._get_and_setup(dsrc) self.assertTrue(ret) self.assertEqual('value', dsrc.metadata['test']) @@ -2053,7 +2287,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): @mock.patch('time.sleep', mock.MagicMock()) @mock.patch(MOCKPATH + 'EphemeralDHCPv4') - def test_poll_imds_re_dhcp_on_timeout(self, m_dhcpv4, report_ready_func, + def test_poll_imds_re_dhcp_on_timeout(self, m_dhcpv4, m_report_ready, m_request, m_media_switch, m_dhcp, m_net): """The poll_imds will retry DHCP on IMDS timeout.""" @@ -2088,8 +2322,8 @@ class TestPreprovisioningPollIMDS(CiTestCase): dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() - self.assertEqual(report_ready_func.call_count, 1) - report_ready_func.assert_called_with(lease=lease) + self.assertEqual(m_report_ready.call_count, 1) + m_report_ready.assert_called_with(lease=lease) self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls') self.assertEqual(4, self.tries, 'Expected 4 total reads from IMDS') diff --git a/tests/unittests/test_datasource/test_azure_helper.py b/tests/unittests/test_datasource/test_azure_helper.py index 6e004e34..adf68857 100644 --- a/tests/unittests/test_datasource/test_azure_helper.py +++ b/tests/unittests/test_datasource/test_azure_helper.py @@ -5,6 +5,7 @@ import re import unittest from textwrap import dedent from xml.etree import ElementTree +from xml.sax.saxutils import escape, unescape from cloudinit.sources.helpers import azure as azure_helper from cloudinit.tests.helpers import CiTestCase, ExitStack, mock, populate_dir @@ -70,6 +71,15 @@ HEALTH_REPORT_XML_TEMPLATE = '''\ ''' +HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE = dedent('''\ +
+ {health_substatus} + {health_description} +
+ ''') + +HEALTH_REPORT_DESCRIPTION_TRIM_LEN = 512 + class SentinelException(Exception): pass @@ -461,17 +471,24 @@ class TestOpenSSLManagerActions(CiTestCase): class TestGoalStateHealthReporter(CiTestCase): + maxDiff = None + default_parameters = { 'incarnation': 1634, 'container_id': 'MyContainerId', 'instance_id': 'MyInstanceId' } - test_endpoint = 'TestEndpoint' - test_url = 'http://{0}/machine?comp=health'.format(test_endpoint) + test_azure_endpoint = 'TestEndpoint' + test_health_report_url = 'http://{0}/machine?comp=health'.format( + test_azure_endpoint) test_default_headers = {'Content-Type': 'text/xml; charset=utf-8'} provisioning_success_status = 'Ready' + provisioning_not_ready_status = 'NotReady' + provisioning_failure_substatus = 'ProvisioningFailed' + provisioning_failure_err_description = ( + 'Test error message containing provisioning failure details') def setUp(self): super(TestGoalStateHealthReporter, self).setUp() @@ -496,17 +513,40 @@ class TestGoalStateHealthReporter(CiTestCase): self.GoalState.return_value.incarnation = \ self.default_parameters['incarnation'] + def _text_from_xpath_in_xroot(self, xroot, xpath): + element = xroot.find(xpath) + if element is not None: + return element.text + return None + def _get_formatted_health_report_xml_string(self, **kwargs): return HEALTH_REPORT_XML_TEMPLATE.format(**kwargs) + def _get_formatted_health_detail_subsection_xml_string(self, **kwargs): + return HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE.format(**kwargs) + def _get_report_ready_health_document(self): return self._get_formatted_health_report_xml_string( - incarnation=self.default_parameters['incarnation'], - container_id=self.default_parameters['container_id'], - instance_id=self.default_parameters['instance_id'], - health_status=self.provisioning_success_status, + incarnation=escape(str(self.default_parameters['incarnation'])), + container_id=escape(self.default_parameters['container_id']), + instance_id=escape(self.default_parameters['instance_id']), + health_status=escape(self.provisioning_success_status), health_detail_subsection='') + def _get_report_failure_health_document(self): + health_detail_subsection = \ + self._get_formatted_health_detail_subsection_xml_string( + health_substatus=escape(self.provisioning_failure_substatus), + health_description=escape( + self.provisioning_failure_err_description)) + + return self._get_formatted_health_report_xml_string( + incarnation=escape(str(self.default_parameters['incarnation'])), + container_id=escape(self.default_parameters['container_id']), + instance_id=escape(self.default_parameters['instance_id']), + health_status=escape(self.provisioning_not_ready_status), + health_detail_subsection=health_detail_subsection) + def test_send_ready_signal_sends_post_request(self): with mock.patch.object( azure_helper.GoalStateHealthReporter, @@ -514,55 +554,130 @@ class TestGoalStateHealthReporter(CiTestCase): client = azure_helper.AzureEndpointHttpClient(mock.MagicMock()) reporter = azure_helper.GoalStateHealthReporter( azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), - client, self.test_endpoint) + client, self.test_azure_endpoint) reporter.send_ready_signal() self.assertEqual(1, self.post.call_count) self.assertEqual( mock.call( - self.test_url, + self.test_health_report_url, + data=m_build_report.return_value, + extra_headers=self.test_default_headers), + self.post.call_args) + + def test_send_failure_signal_sends_post_request(self): + with mock.patch.object( + azure_helper.GoalStateHealthReporter, + 'build_report') as m_build_report: + client = azure_helper.AzureEndpointHttpClient(mock.MagicMock()) + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + client, self.test_azure_endpoint) + reporter.send_failure_signal( + description=self.provisioning_failure_err_description) + + self.assertEqual(1, self.post.call_count) + self.assertEqual( + mock.call( + self.test_health_report_url, data=m_build_report.return_value, extra_headers=self.test_default_headers), self.post.call_args) - def test_build_report_for_health_document(self): + def test_build_report_for_ready_signal_health_document(self): health_document = self._get_report_ready_health_document() reporter = azure_helper.GoalStateHealthReporter( azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), azure_helper.AzureEndpointHttpClient(mock.MagicMock()), - self.test_endpoint) + self.test_azure_endpoint) generated_health_document = reporter.build_report( incarnation=self.default_parameters['incarnation'], container_id=self.default_parameters['container_id'], instance_id=self.default_parameters['instance_id'], status=self.provisioning_success_status) + self.assertEqual(health_document, generated_health_document) - self.assertIn( - '{}'.format( - str(self.default_parameters['incarnation'])), - generated_health_document) - self.assertIn( - ''.join([ - '', - self.default_parameters['container_id'], - '']), - generated_health_document) - self.assertIn( - ''.join([ - '', - self.default_parameters['instance_id'], - '']), - generated_health_document) - self.assertIn( - ''.join([ - '', - self.provisioning_success_status, - '']), - generated_health_document + + generated_xroot = ElementTree.fromstring(generated_health_document) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, './GoalStateIncarnation'), + str(self.default_parameters['incarnation'])) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, './Container/ContainerId'), + str(self.default_parameters['container_id'])) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/InstanceId'), + str(self.default_parameters['instance_id'])) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/State'), + escape(self.provisioning_success_status)) + self.assertIsNone( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details')) + self.assertIsNone( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/SubStatus')) + self.assertIsNone( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/Description') ) - self.assertNotIn('
', generated_health_document) - self.assertNotIn('', generated_health_document) - self.assertNotIn('', generated_health_document) + + def test_build_report_for_failure_signal_health_document(self): + health_document = self._get_report_failure_health_document() + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + azure_helper.AzureEndpointHttpClient(mock.MagicMock()), + self.test_azure_endpoint) + generated_health_document = reporter.build_report( + incarnation=self.default_parameters['incarnation'], + container_id=self.default_parameters['container_id'], + instance_id=self.default_parameters['instance_id'], + status=self.provisioning_not_ready_status, + substatus=self.provisioning_failure_substatus, + description=self.provisioning_failure_err_description) + + self.assertEqual(health_document, generated_health_document) + + generated_xroot = ElementTree.fromstring(generated_health_document) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, './GoalStateIncarnation'), + str(self.default_parameters['incarnation'])) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, './Container/ContainerId'), + self.default_parameters['container_id']) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/InstanceId'), + self.default_parameters['instance_id']) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/State'), + escape(self.provisioning_not_ready_status)) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/' + 'SubStatus'), + escape(self.provisioning_failure_substatus)) + self.assertEqual( + self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/' + 'Description'), + escape(self.provisioning_failure_err_description)) def test_send_ready_signal_calls_build_report(self): with mock.patch.object( @@ -571,7 +686,7 @@ class TestGoalStateHealthReporter(CiTestCase): reporter = azure_helper.GoalStateHealthReporter( azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), azure_helper.AzureEndpointHttpClient(mock.MagicMock()), - self.test_endpoint) + self.test_azure_endpoint) reporter.send_ready_signal() self.assertEqual(1, m_build_report.call_count) @@ -583,6 +698,131 @@ class TestGoalStateHealthReporter(CiTestCase): status=self.provisioning_success_status), m_build_report.call_args) + def test_send_failure_signal_calls_build_report(self): + with mock.patch.object( + azure_helper.GoalStateHealthReporter, 'build_report' + ) as m_build_report: + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + azure_helper.AzureEndpointHttpClient(mock.MagicMock()), + self.test_azure_endpoint) + reporter.send_failure_signal( + description=self.provisioning_failure_err_description) + + self.assertEqual(1, m_build_report.call_count) + self.assertEqual( + mock.call( + incarnation=self.default_parameters['incarnation'], + container_id=self.default_parameters['container_id'], + instance_id=self.default_parameters['instance_id'], + status=self.provisioning_not_ready_status, + substatus=self.provisioning_failure_substatus, + description=self.provisioning_failure_err_description), + m_build_report.call_args) + + def test_build_report_escapes_chars(self): + incarnation = 'jd8\'9*&^<\'A>' + instance_id = 'Opo>>>jas\'&d;[p&fp\"a<&aa\'sd!@&!)((*<&>' + health_substatus = '&as\"d<d<\'^@!5&6<7' + health_description = '&&&>!#$\"&&><>&\"sd<67<]>>' + + health_detail_subsection = \ + self._get_formatted_health_detail_subsection_xml_string( + health_substatus=escape(health_substatus), + health_description=escape(health_description)) + health_document = self._get_formatted_health_report_xml_string( + incarnation=escape(incarnation), + container_id=escape(container_id), + instance_id=escape(instance_id), + health_status=escape(health_status), + health_detail_subsection=health_detail_subsection) + + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + azure_helper.AzureEndpointHttpClient(mock.MagicMock()), + self.test_azure_endpoint) + generated_health_document = reporter.build_report( + incarnation=incarnation, + container_id=container_id, + instance_id=instance_id, + status=health_status, + substatus=health_substatus, + description=health_description) + + self.assertEqual(health_document, generated_health_document) + + def test_build_report_conforms_to_length_limits(self): + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + azure_helper.AzureEndpointHttpClient(mock.MagicMock()), + self.test_azure_endpoint) + long_err_msg = 'a9&ea8>>>e as1< d\"q2*&(^%\'a=5<' * 100 + generated_health_document = reporter.build_report( + incarnation=self.default_parameters['incarnation'], + container_id=self.default_parameters['container_id'], + instance_id=self.default_parameters['instance_id'], + status=self.provisioning_not_ready_status, + substatus=self.provisioning_failure_substatus, + description=long_err_msg) + + generated_xroot = ElementTree.fromstring(generated_health_document) + generated_health_report_description = self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/Description') + self.assertEqual( + len(unescape(generated_health_report_description)), + HEALTH_REPORT_DESCRIPTION_TRIM_LEN) + + def test_trim_description_then_escape_conforms_to_len_limits_worst_case( + self): + """When unescaped characters are XML-escaped, the length increases. + Char Escape String + < < + > > + " " + ' ' + & & + + We (step 1) trim the health report XML's description field, + and then (step 2) XML-escape the health report XML's description field. + + The health report XML's description field limit within cloud-init + is HEALTH_REPORT_DESCRIPTION_TRIM_LEN. + + The Azure platform's limit on the health report XML's description field + is 4096 chars. + + For worst-case chars, there is a 5x blowup in length + when the chars are XML-escaped. + ' and " when XML-escaped have a 5x blowup. + + Ensure that (1) trimming and then (2) XML-escaping does not blow past + the Azure platform's limit for health report XML's description field + (4096 chars). + """ + reporter = azure_helper.GoalStateHealthReporter( + azure_helper.GoalState(mock.MagicMock(), mock.MagicMock()), + azure_helper.AzureEndpointHttpClient(mock.MagicMock()), + self.test_azure_endpoint) + long_err_msg = '\'\"' * 10000 + generated_health_document = reporter.build_report( + incarnation=self.default_parameters['incarnation'], + container_id=self.default_parameters['container_id'], + instance_id=self.default_parameters['instance_id'], + status=self.provisioning_not_ready_status, + substatus=self.provisioning_failure_substatus, + description=long_err_msg) + + generated_xroot = ElementTree.fromstring(generated_health_document) + generated_health_report_description = self._text_from_xpath_in_xroot( + generated_xroot, + './Container/RoleInstanceList/Role/Health/Details/Description') + # The escaped description string should be less than + # the Azure platform limit for the escaped description string. + self.assertLessEqual(len(generated_health_report_description), 4096) + class TestWALinuxAgentShim(CiTestCase): @@ -598,7 +838,7 @@ class TestWALinuxAgentShim(CiTestCase): self.GoalState = patches.enter_context( mock.patch.object(azure_helper, 'GoalState')) self.OpenSSLManager = patches.enter_context( - mock.patch.object(azure_helper, 'OpenSSLManager')) + mock.patch.object(azure_helper, 'OpenSSLManager', autospec=True)) patches.enter_context( mock.patch.object(azure_helper.time, 'sleep', mock.MagicMock())) @@ -609,24 +849,47 @@ class TestWALinuxAgentShim(CiTestCase): self.GoalState.return_value.container_id = self.test_container_id self.GoalState.return_value.instance_id = self.test_instance_id - def test_http_client_does_not_use_certificate(self): + def test_http_client_does_not_use_certificate_for_report_ready(self): shim = wa_shim() shim.register_with_azure_and_fetch_data() self.assertEqual( [mock.call(None)], self.AzureEndpointHttpClient.call_args_list) + def test_http_client_does_not_use_certificate_for_report_failure(self): + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + self.assertEqual( + [mock.call(None)], + self.AzureEndpointHttpClient.call_args_list) + def test_correct_url_used_for_goalstate_during_report_ready(self): self.find_endpoint.return_value = 'test_endpoint' shim = wa_shim() shim.register_with_azure_and_fetch_data() - get = self.AzureEndpointHttpClient.return_value.get + m_get = self.AzureEndpointHttpClient.return_value.get + self.assertEqual( + [mock.call('http://test_endpoint/machine/?comp=goalstate')], + m_get.call_args_list) + self.assertEqual( + [mock.call( + m_get.return_value.contents, + self.AzureEndpointHttpClient.return_value, + False + )], + self.GoalState.call_args_list) + + def test_correct_url_used_for_goalstate_during_report_failure(self): + self.find_endpoint.return_value = 'test_endpoint' + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + m_get = self.AzureEndpointHttpClient.return_value.get self.assertEqual( [mock.call('http://test_endpoint/machine/?comp=goalstate')], - get.call_args_list) + m_get.call_args_list) self.assertEqual( [mock.call( - get.return_value.contents, + m_get.return_value.contents, self.AzureEndpointHttpClient.return_value, False )], @@ -670,6 +933,16 @@ class TestWALinuxAgentShim(CiTestCase): self.AzureEndpointHttpClient.return_value.post .call_args_list) + def test_correct_url_used_for_report_failure(self): + self.find_endpoint.return_value = 'test_endpoint' + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + expected_url = 'http://test_endpoint/machine?comp=health' + self.assertEqual( + [mock.call(expected_url, data=mock.ANY, extra_headers=mock.ANY)], + self.AzureEndpointHttpClient.return_value.post + .call_args_list) + def test_goal_state_values_used_for_report_ready(self): shim = wa_shim() shim.register_with_azure_and_fetch_data() @@ -681,44 +954,128 @@ class TestWALinuxAgentShim(CiTestCase): self.assertIn(self.test_container_id, posted_document) self.assertIn(self.test_instance_id, posted_document) - def test_xml_elems_in_report_ready(self): + def test_goal_state_values_used_for_report_failure(self): + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + posted_document = ( + self.AzureEndpointHttpClient.return_value.post + .call_args[1]['data'] + ) + self.assertIn(self.test_incarnation, posted_document) + self.assertIn(self.test_container_id, posted_document) + self.assertIn(self.test_instance_id, posted_document) + + def test_xml_elems_in_report_ready_post(self): shim = wa_shim() shim.register_with_azure_and_fetch_data() health_document = HEALTH_REPORT_XML_TEMPLATE.format( - incarnation=self.test_incarnation, - container_id=self.test_container_id, - instance_id=self.test_instance_id, - health_status='Ready', + incarnation=escape(self.test_incarnation), + container_id=escape(self.test_container_id), + instance_id=escape(self.test_instance_id), + health_status=escape('Ready'), health_detail_subsection='') posted_document = ( self.AzureEndpointHttpClient.return_value.post .call_args[1]['data']) self.assertEqual(health_document, posted_document) + def test_xml_elems_in_report_failure_post(self): + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + health_document = HEALTH_REPORT_XML_TEMPLATE.format( + incarnation=escape(self.test_incarnation), + container_id=escape(self.test_container_id), + instance_id=escape(self.test_instance_id), + health_status=escape('NotReady'), + health_detail_subsection=HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE + .format( + health_substatus=escape('ProvisioningFailed'), + health_description=escape('TestDesc'))) + posted_document = ( + self.AzureEndpointHttpClient.return_value.post + .call_args[1]['data']) + self.assertEqual(health_document, posted_document) + + @mock.patch.object(azure_helper, 'GoalStateHealthReporter', autospec=True) + def test_register_with_azure_and_fetch_data_calls_send_ready_signal( + self, m_goal_state_health_reporter): + shim = wa_shim() + shim.register_with_azure_and_fetch_data() + self.assertEqual( + 1, + m_goal_state_health_reporter.return_value.send_ready_signal + .call_count) + + @mock.patch.object(azure_helper, 'GoalStateHealthReporter', autospec=True) + def test_register_with_azure_and_report_failure_calls_send_failure_signal( + self, m_goal_state_health_reporter): + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + m_goal_state_health_reporter.return_value.send_failure_signal \ + .assert_called_once_with(description='TestDesc') + + def test_register_with_azure_and_report_failure_does_not_need_certificates( + self): + shim = wa_shim() + with mock.patch.object( + shim, '_fetch_goal_state_from_azure', autospec=True + ) as m_fetch_goal_state_from_azure: + shim.register_with_azure_and_report_failure(description='TestDesc') + m_fetch_goal_state_from_azure.assert_called_once_with( + need_certificate=False) + def test_clean_up_can_be_called_at_any_time(self): shim = wa_shim() shim.clean_up() + def test_openssl_manager_not_instantiated_by_shim_report_status(self): + shim = wa_shim() + shim.register_with_azure_and_fetch_data() + shim.register_with_azure_and_report_failure(description='TestDesc') + shim.clean_up() + self.OpenSSLManager.assert_not_called() + def test_clean_up_after_report_ready(self): shim = wa_shim() shim.register_with_azure_and_fetch_data() shim.clean_up() - self.assertEqual( - 0, self.OpenSSLManager.return_value.clean_up.call_count) + self.OpenSSLManager.return_value.clean_up.assert_not_called() + + def test_clean_up_after_report_failure(self): + shim = wa_shim() + shim.register_with_azure_and_report_failure(description='TestDesc') + shim.clean_up() + self.OpenSSLManager.return_value.clean_up.assert_not_called() def test_fetch_goalstate_during_report_ready_raises_exc_on_get_exc(self): self.AzureEndpointHttpClient.return_value.get \ - .side_effect = (SentinelException) + .side_effect = SentinelException shim = wa_shim() self.assertRaises(SentinelException, shim.register_with_azure_and_fetch_data) + def test_fetch_goalstate_during_report_failure_raises_exc_on_get_exc(self): + self.AzureEndpointHttpClient.return_value.get \ + .side_effect = SentinelException + shim = wa_shim() + self.assertRaises(SentinelException, + shim.register_with_azure_and_report_failure, + description='TestDesc') + def test_fetch_goalstate_during_report_ready_raises_exc_on_parse_exc(self): self.GoalState.side_effect = SentinelException shim = wa_shim() self.assertRaises(SentinelException, shim.register_with_azure_and_fetch_data) + def test_fetch_goalstate_during_report_failure_raises_exc_on_parse_exc( + self): + self.GoalState.side_effect = SentinelException + shim = wa_shim() + self.assertRaises(SentinelException, + shim.register_with_azure_and_report_failure, + description='TestDesc') + def test_failure_to_send_report_ready_health_doc_bubbles_up(self): self.AzureEndpointHttpClient.return_value.post \ .side_effect = SentinelException @@ -726,56 +1083,132 @@ class TestWALinuxAgentShim(CiTestCase): self.assertRaises(SentinelException, shim.register_with_azure_and_fetch_data) + def test_failure_to_send_report_failure_health_doc_bubbles_up(self): + self.AzureEndpointHttpClient.return_value.post \ + .side_effect = SentinelException + shim = wa_shim() + self.assertRaises(SentinelException, + shim.register_with_azure_and_report_failure, + description='TestDesc') + class TestGetMetadataGoalStateXMLAndReportReadyToFabric(CiTestCase): - @mock.patch.object(azure_helper, 'WALinuxAgentShim') - def test_data_from_shim_returned(self, shim): + def setUp(self): + super(TestGetMetadataGoalStateXMLAndReportReadyToFabric, self).setUp() + patches = ExitStack() + self.addCleanup(patches.close) + + self.m_shim = patches.enter_context( + mock.patch.object(azure_helper, 'WALinuxAgentShim')) + + def test_data_from_shim_returned(self): ret = azure_helper.get_metadata_from_fabric() self.assertEqual( - shim.return_value.register_with_azure_and_fetch_data.return_value, + self.m_shim.return_value.register_with_azure_and_fetch_data + .return_value, ret) - @mock.patch.object(azure_helper, 'WALinuxAgentShim') - def test_success_calls_clean_up(self, shim): + def test_success_calls_clean_up(self): azure_helper.get_metadata_from_fabric() - self.assertEqual(1, shim.return_value.clean_up.call_count) + self.assertEqual(1, self.m_shim.return_value.clean_up.call_count) - @mock.patch.object(azure_helper, 'WALinuxAgentShim') def test_failure_in_registration_propagates_exc_and_calls_clean_up( - self, shim): - shim.return_value.register_with_azure_and_fetch_data.side_effect = ( - SentinelException) + self): + self.m_shim.return_value.register_with_azure_and_fetch_data \ + .side_effect = SentinelException self.assertRaises(SentinelException, azure_helper.get_metadata_from_fabric) - self.assertEqual(1, shim.return_value.clean_up.call_count) + self.assertEqual(1, self.m_shim.return_value.clean_up.call_count) - @mock.patch.object(azure_helper, 'WALinuxAgentShim') - def test_calls_shim_register_with_azure_and_fetch_data(self, shim): + def test_calls_shim_register_with_azure_and_fetch_data(self): m_pubkey_info = mock.MagicMock() azure_helper.get_metadata_from_fabric(pubkey_info=m_pubkey_info) self.assertEqual( 1, - shim.return_value + self.m_shim.return_value .register_with_azure_and_fetch_data.call_count) self.assertEqual( mock.call(pubkey_info=m_pubkey_info), - shim.return_value + self.m_shim.return_value .register_with_azure_and_fetch_data.call_args) - @mock.patch.object(azure_helper, 'WALinuxAgentShim') - def test_instantiates_shim_with_kwargs(self, shim): + def test_instantiates_shim_with_kwargs(self): m_fallback_lease_file = mock.MagicMock() m_dhcp_options = mock.MagicMock() azure_helper.get_metadata_from_fabric( fallback_lease_file=m_fallback_lease_file, dhcp_opts=m_dhcp_options) - self.assertEqual(1, shim.call_count) + self.assertEqual(1, self.m_shim.call_count) self.assertEqual( mock.call( fallback_lease_file=m_fallback_lease_file, dhcp_options=m_dhcp_options), - shim.call_args) + self.m_shim.call_args) + + +class TestGetMetadataGoalStateXMLAndReportFailureToFabric(CiTestCase): + + def setUp(self): + super( + TestGetMetadataGoalStateXMLAndReportFailureToFabric, self).setUp() + patches = ExitStack() + self.addCleanup(patches.close) + + self.m_shim = patches.enter_context( + mock.patch.object(azure_helper, 'WALinuxAgentShim')) + + def test_success_calls_clean_up(self): + azure_helper.report_failure_to_fabric() + self.assertEqual( + 1, + self.m_shim.return_value.clean_up.call_count) + + def test_failure_in_shim_report_failure_propagates_exc_and_calls_clean_up( + self): + self.m_shim.return_value.register_with_azure_and_report_failure \ + .side_effect = SentinelException + self.assertRaises(SentinelException, + azure_helper.report_failure_to_fabric) + self.assertEqual( + 1, + self.m_shim.return_value.clean_up.call_count) + + def test_report_failure_to_fabric_with_desc_calls_shim_report_failure( + self): + azure_helper.report_failure_to_fabric(description='TestDesc') + self.m_shim.return_value.register_with_azure_and_report_failure \ + .assert_called_once_with(description='TestDesc') + + def test_report_failure_to_fabric_with_no_desc_calls_shim_report_failure( + self): + azure_helper.report_failure_to_fabric() + # default err message description should be shown to the user + # if no description is passed in + self.m_shim.return_value.register_with_azure_and_report_failure \ + .assert_called_once_with( + description=azure_helper + .DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE) + + def test_report_failure_to_fabric_empty_desc_calls_shim_report_failure( + self): + azure_helper.report_failure_to_fabric(description='') + # default err message description should be shown to the user + # if an empty description is passed in + self.m_shim.return_value.register_with_azure_and_report_failure \ + .assert_called_once_with( + description=azure_helper + .DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE) + + def test_instantiates_shim_with_kwargs(self): + m_fallback_lease_file = mock.MagicMock() + m_dhcp_options = mock.MagicMock() + azure_helper.report_failure_to_fabric( + fallback_lease_file=m_fallback_lease_file, + dhcp_opts=m_dhcp_options) + self.m_shim.assert_called_once_with( + fallback_lease_file=m_fallback_lease_file, + dhcp_options=m_dhcp_options) class TestExtractIpAddressFromNetworkd(CiTestCase): -- cgit v1.2.3 From eea754492f074e00b601cf77aa278e3623857c5a Mon Sep 17 00:00:00 2001 From: Anh Vo Date: Thu, 19 Nov 2020 00:35:46 -0500 Subject: DataSourceAzure: update password for defuser if exists (#671) cc_set_password will only update the password for the default user if cfg['password'] is set. The existing code of datasource Azure will fail to update the default user's password because it does not set that metadata. If the default user doesn't exist in the image, the current code works fine because the password is set during user create and not in cc_set_password --- cloudinit/sources/DataSourceAzure.py | 2 +- tests/unittests/test_datasource/test_azure.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index ab139b8d..f777a007 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -1391,7 +1391,7 @@ def read_azure_ovf(contents): if password: defuser['lock_passwd'] = False if DEF_PASSWD_REDACTION != password: - defuser['passwd'] = encrypt_pass(password) + defuser['passwd'] = cfg['password'] = encrypt_pass(password) if defuser: cfg['system_info'] = {'default_user': defuser} diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index d9752ab7..534314aa 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -1080,6 +1080,9 @@ scbus-1 on xpt0 bus 0 crypt.crypt(odata['UserPassword'], defuser['passwd'][0:pos])) + # the same hashed value should also be present in cfg['password'] + self.assertEqual(defuser['passwd'], dsrc.cfg['password']) + def test_user_not_locked_if_password_redacted(self): odata = {'HostName': "myhost", 'UserName': "myuser", 'UserPassword': dsaz.DEF_PASSWD_REDACTION} -- cgit v1.2.3 From a4d0feb050e32277a218e45bfb6a496d26ff46d0 Mon Sep 17 00:00:00 2001 From: aswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com> Date: Mon, 23 Nov 2020 07:04:05 -0800 Subject: Ability to hot-attach NICs to preprovisioned VMs before reprovisioning (#613) Adds the ability to run the Azure preprovisioned VMs as NIC-less and then hot-attach them when assigned for reprovision. The NIC on the preprovisioned VM is hot-detached as soon as it reports ready and goes into wait for one or more interfaces to be hot-attached. Once they are attached, cloud-init gets the expected number of NICs (in case there are more than one) that will be attached from IMDS and waits until all of them are attached. After all the NICs are attached, reprovision proceeds as usual. --- cloudinit/distros/networking.py | 15 + cloudinit/distros/tests/test_networking.py | 31 ++ cloudinit/sources/DataSourceAzure.py | 515 ++++++++++++++++++++++-- cloudinit/sources/helpers/netlink.py | 102 ++++- cloudinit/sources/helpers/tests/test_netlink.py | 74 +++- tests/unittests/test_datasource/test_azure.py | 436 +++++++++++++++++++- tools/.github-cla-signers | 1 + 7 files changed, 1109 insertions(+), 65 deletions(-) (limited to 'tests/unittests/test_datasource/test_azure.py') diff --git a/cloudinit/distros/networking.py b/cloudinit/distros/networking.py index e407fa29..c291196a 100644 --- a/cloudinit/distros/networking.py +++ b/cloudinit/distros/networking.py @@ -2,6 +2,7 @@ import abc import logging import os +from cloudinit import subp from cloudinit import net, util @@ -175,6 +176,10 @@ class Networking(metaclass=abc.ABCMeta): if strict: raise RuntimeError(msg) + @abc.abstractmethod + def try_set_link_up(self, devname: DeviceName) -> bool: + """Try setting the link to up explicitly and return if it is up.""" + class BSDNetworking(Networking): """Implementation of networking functionality shared across BSDs.""" @@ -185,6 +190,9 @@ class BSDNetworking(Networking): def settle(self, *, exists=None) -> None: """BSD has no equivalent to `udevadm settle`; noop.""" + def try_set_link_up(self, devname: DeviceName) -> bool: + raise NotImplementedError() + class LinuxNetworking(Networking): """Implementation of networking functionality common to Linux distros.""" @@ -214,3 +222,10 @@ class LinuxNetworking(Networking): if exists is not None: exists = net.sys_dev_path(exists) util.udevadm_settle(exists=exists) + + def try_set_link_up(self, devname: DeviceName) -> bool: + """Try setting the link to up explicitly and return if it is up. + Not guaranteed to bring the interface up. The caller is expected to + add wait times before retrying.""" + subp.subp(['ip', 'link', 'set', devname, 'up']) + return self.is_up(devname) diff --git a/cloudinit/distros/tests/test_networking.py b/cloudinit/distros/tests/test_networking.py index b9a63842..ec508f4d 100644 --- a/cloudinit/distros/tests/test_networking.py +++ b/cloudinit/distros/tests/test_networking.py @@ -30,6 +30,9 @@ def generic_networking_cls(): def settle(self, *args, **kwargs): raise NotImplementedError + def try_set_link_up(self, *args, **kwargs): + raise NotImplementedError + error = AssertionError("Unexpectedly used /sys in generic networking code") with mock.patch( "cloudinit.net.get_sys_class_path", side_effect=error, @@ -74,6 +77,34 @@ class TestLinuxNetworkingIsPhysical: assert LinuxNetworking().is_physical(devname) +class TestBSDNetworkingTrySetLinkUp: + def test_raises_notimplementederror(self): + with pytest.raises(NotImplementedError): + BSDNetworking().try_set_link_up("eth0") + + +@mock.patch("cloudinit.net.is_up") +@mock.patch("cloudinit.distros.networking.subp.subp") +class TestLinuxNetworkingTrySetLinkUp: + def test_calls_subp_return_true(self, m_subp, m_is_up): + devname = "eth0" + m_is_up.return_value = True + is_success = LinuxNetworking().try_set_link_up(devname) + + assert (mock.call(['ip', 'link', 'set', devname, 'up']) == + m_subp.call_args_list[-1]) + assert is_success + + def test_calls_subp_return_false(self, m_subp, m_is_up): + devname = "eth0" + m_is_up.return_value = False + is_success = LinuxNetworking().try_set_link_up(devname) + + assert (mock.call(['ip', 'link', 'set', devname, 'up']) == + m_subp.call_args_list[-1]) + assert not is_success + + class TestBSDNetworkingSettle: def test_settle_doesnt_error(self): # This also implicitly tests that it doesn't use subp.subp diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index f777a007..04ff2131 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -12,8 +12,10 @@ import os import os.path import re from time import time +from time import sleep from xml.dom import minidom import xml.etree.ElementTree as ET +from enum import Enum from cloudinit import dmi from cloudinit import log as logging @@ -67,13 +69,27 @@ DEFAULT_FS = 'ext4' # DMI chassis-asset-tag is set static for all azure instances AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77' REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds" +REPROVISION_NIC_ATTACH_MARKER_FILE = "/var/lib/cloud/data/wait_for_nic_attach" +REPROVISION_NIC_DETACHED_MARKER_FILE = "/var/lib/cloud/data/nic_detached" REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready" AGENT_SEED_DIR = '/var/lib/waagent' + # In the event where the IMDS primary server is not # available, it takes 1s to fallback to the secondary one IMDS_TIMEOUT_IN_SECONDS = 2 IMDS_URL = "http://169.254.169.254/metadata/" +IMDS_VER = "2019-06-01" +IMDS_VER_PARAM = "api-version={}".format(IMDS_VER) + + +class metadata_type(Enum): + compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM) + network = "{}instance/network?{}".format(IMDS_URL, + IMDS_VER_PARAM) + reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL, + IMDS_VER_PARAM) + PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0" @@ -434,20 +450,39 @@ class DataSourceAzure(sources.DataSource): # need to look in the datadir and consider that valid ddir = self.ds_cfg['data_dir'] + # The order in which the candidates are inserted matters here, because + # it determines the value of ret. More specifically, the first one in + # the candidate list determines the path to take in order to get the + # metadata we need. candidates = [self.seed_dir] if os.path.isfile(REPROVISION_MARKER_FILE): candidates.insert(0, "IMDS") + report_diagnostic_event("Reprovision marker file already present " + "before crawling Azure metadata: %s" % + REPROVISION_MARKER_FILE, + logger_func=LOG.debug) + elif os.path.isfile(REPROVISION_NIC_ATTACH_MARKER_FILE): + candidates.insert(0, "NIC_ATTACH_MARKER_PRESENT") + report_diagnostic_event("Reprovision nic attach marker file " + "already present before crawling Azure " + "metadata: %s" % + REPROVISION_NIC_ATTACH_MARKER_FILE, + logger_func=LOG.debug) candidates.extend(list_possible_azure_ds_devs()) if ddir: candidates.append(ddir) found = None reprovision = False + reprovision_after_nic_attach = False for cdev in candidates: try: if cdev == "IMDS": ret = None reprovision = True + elif cdev == "NIC_ATTACH_MARKER_PRESENT": + ret = None + reprovision_after_nic_attach = True elif cdev.startswith("/dev/"): if util.is_FreeBSD(): ret = util.mount_cb(cdev, load_azure_ds_dir, @@ -472,12 +507,19 @@ class DataSourceAzure(sources.DataSource): continue perform_reprovision = reprovision or self._should_reprovision(ret) - if perform_reprovision: + perform_reprovision_after_nic_attach = ( + reprovision_after_nic_attach or + self._should_reprovision_after_nic_attach(ret)) + + if perform_reprovision or perform_reprovision_after_nic_attach: if util.is_FreeBSD(): msg = "Free BSD is not supported for PPS VMs" report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) + if perform_reprovision_after_nic_attach: + self._wait_for_all_nics_ready() ret = self._reprovision() + imds_md = get_metadata_from_imds( self.fallback_interface, retries=10) (md, userdata_raw, cfg, files) = ret @@ -508,7 +550,7 @@ class DataSourceAzure(sources.DataSource): crawled_data['metadata']['random_seed'] = seed crawled_data['metadata']['instance-id'] = self._iid() - if perform_reprovision: + if perform_reprovision or perform_reprovision_after_nic_attach: LOG.info("Reporting ready to Azure after getting ReprovisionData") use_cached_ephemeral = ( self.distro.networking.is_up(self.fallback_interface) and @@ -617,14 +659,12 @@ class DataSourceAzure(sources.DataSource): LOG.debug('Retrieved SSH keys from IMDS') except KeyError: log_msg = 'Unable to get keys from IMDS, falling back to OVF' - LOG.debug(log_msg) report_diagnostic_event(log_msg, logger_func=LOG.debug) try: ssh_keys = self.metadata['public-keys'] LOG.debug('Retrieved keys from OVF') except KeyError: log_msg = 'No keys available from OVF' - LOG.debug(log_msg) report_diagnostic_event(log_msg, logger_func=LOG.debug) return ssh_keys @@ -660,10 +700,293 @@ class DataSourceAzure(sources.DataSource): LOG.debug("negotiating already done for %s", self.get_instance_id()) + @azure_ds_telemetry_reporter + def _wait_for_nic_detach(self, nl_sock): + """Use the netlink socket provided to wait for nic detach event. + NOTE: The function doesn't close the socket. The caller owns closing + the socket and disposing it safely. + """ + try: + ifname = None + + # Preprovisioned VM will only have one NIC, and it gets + # detached immediately after deployment. + with events.ReportEventStack( + name="wait-for-nic-detach", + description=("wait for nic detach"), + parent=azure_ds_reporter): + ifname = netlink.wait_for_nic_detach_event(nl_sock) + if ifname is None: + msg = ("Preprovisioned nic not detached as expected. " + "Proceeding without failing.") + report_diagnostic_event(msg, logger_func=LOG.warning) + else: + report_diagnostic_event("The preprovisioned nic %s is detached" + % ifname, logger_func=LOG.warning) + path = REPROVISION_NIC_DETACHED_MARKER_FILE + LOG.info("Creating a marker file for nic detached: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + except AssertionError as error: + report_diagnostic_event(error, logger_func=LOG.error) + raise + + @azure_ds_telemetry_reporter + def wait_for_link_up(self, ifname): + """In cases where the link state is still showing down after a nic is + hot-attached, we can attempt to bring it up by forcing the hv_netvsc + drivers to query the link state by unbinding and then binding the + device. This function attempts infinitely until the link is up, + because we cannot proceed further until we have a stable link.""" + + if self.distro.networking.try_set_link_up(ifname): + report_diagnostic_event("The link %s is already up." % ifname, + logger_func=LOG.info) + return + + LOG.info("Attempting to bring %s up", ifname) + + attempts = 0 + while True: + + LOG.info("Unbinding and binding the interface %s", ifname) + devicename = net.read_sys_net(ifname, + 'device/device_id').strip('{}') + util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind', + devicename) + util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/bind', + devicename) + + attempts = attempts + 1 + if self.distro.networking.try_set_link_up(ifname): + msg = "The link %s is up after %s attempts" % (ifname, + attempts) + report_diagnostic_event(msg, logger_func=LOG.info) + return + + sleep_duration = 1 + msg = ("Link is not up after %d attempts with %d seconds sleep " + "between attempts." % (attempts, sleep_duration)) + + if attempts % 10 == 0: + report_diagnostic_event(msg, logger_func=LOG.info) + else: + LOG.info(msg) + + sleep(sleep_duration) + + @azure_ds_telemetry_reporter + def _create_report_ready_marker(self): + path = REPORTED_READY_MARKER_FILE + LOG.info( + "Creating a marker file to report ready: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + report_diagnostic_event( + 'Successfully created reported ready marker file ' + 'while in the preprovisioning pool.', + logger_func=LOG.debug) + + @azure_ds_telemetry_reporter + def _report_ready_if_needed(self): + """Report ready to the platform if the marker file is not present, + and create the marker file. + """ + have_not_reported_ready = ( + not os.path.isfile(REPORTED_READY_MARKER_FILE)) + + if have_not_reported_ready: + report_diagnostic_event("Reporting ready before nic detach", + logger_func=LOG.info) + try: + with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: + self._report_ready(lease=lease) + except Exception as e: + report_diagnostic_event("Exception reporting ready during " + "preprovisioning before nic detach: %s" + % e, logger_func=LOG.error) + raise + self._create_report_ready_marker() + else: + report_diagnostic_event("Already reported ready before nic detach." + " The marker file already exists: %s" % + REPORTED_READY_MARKER_FILE, + logger_func=LOG.error) + + @azure_ds_telemetry_reporter + def _check_if_nic_is_primary(self, ifname): + """Check if a given interface is the primary nic or not. If it is the + primary nic, then we also get the expected total nic count from IMDS. + IMDS will process the request and send a response only for primary NIC. + """ + is_primary = False + expected_nic_count = -1 + imds_md = None + + # For now, only a VM's primary NIC can contact IMDS and WireServer. If + # DHCP fails for a NIC, we have no mechanism to determine if the NIC is + # primary or secondary. In this case, the desired behavior is to fail + # VM provisioning if there is any DHCP failure when trying to determine + # the primary NIC. + try: + with events.ReportEventStack( + name="obtain-dhcp-lease", + description=("obtain dhcp lease for %s when attempting to " + "determine primary NIC during reprovision of " + "a pre-provisioned VM" % ifname), + parent=azure_ds_reporter): + dhcp_ctx = EphemeralDHCPv4( + iface=ifname, + dhcp_log_func=dhcp_log_cb) + dhcp_ctx.obtain_lease() + except Exception as e: + report_diagnostic_event("Giving up. Failed to obtain dhcp lease " + "for %s when attempting to determine " + "primary NIC during reprovision due to %s" + % (ifname, e), logger_func=LOG.error) + raise + + # Primary nic detection will be optimized in the future. The fact that + # primary nic is being attached first helps here. Otherwise each nic + # could add several seconds of delay. + try: + imds_md = get_metadata_from_imds( + ifname, + 5, + metadata_type.network) + except Exception as e: + LOG.warning( + "Failed to get network metadata using nic %s. Attempt to " + "contact IMDS failed with error %s. Assuming this is not the " + "primary nic.", ifname, e) + finally: + # If we are not the primary nic, then clean the dhcp context. + if imds_md is None: + dhcp_ctx.clean_network() + + if imds_md is not None: + # Only primary NIC will get a response from IMDS. + LOG.info("%s is the primary nic", ifname) + is_primary = True + + # If primary, set ephemeral dhcp ctx so we can report ready + self._ephemeral_dhcp_ctx = dhcp_ctx + + # Set the expected nic count based on the response received. + expected_nic_count = len( + imds_md['interface']) + report_diagnostic_event("Expected nic count: %d" % + expected_nic_count, logger_func=LOG.info) + + return is_primary, expected_nic_count + + @azure_ds_telemetry_reporter + def _wait_for_hot_attached_nics(self, nl_sock): + """Wait until all the expected nics for the vm are hot-attached. + The expected nic count is obtained by requesting the network metadata + from IMDS. + """ + LOG.info("Waiting for nics to be hot-attached") + try: + # Wait for nics to be attached one at a time, until we know for + # sure that all nics have been attached. + nics_found = [] + primary_nic_found = False + expected_nic_count = -1 + + # Wait for netlink nic attach events. After the first nic is + # attached, we are already in the customer vm deployment path and + # so eerything from then on should happen fast and avoid + # unnecessary delays wherever possible. + while True: + ifname = None + with events.ReportEventStack( + name="wait-for-nic-attach", + description=("wait for nic attach after %d nics have " + "been attached" % len(nics_found)), + parent=azure_ds_reporter): + ifname = netlink.wait_for_nic_attach_event(nl_sock, + nics_found) + + # wait_for_nic_attach_event guarantees that ifname it not None + nics_found.append(ifname) + report_diagnostic_event("Detected nic %s attached." % ifname, + logger_func=LOG.info) + + # Attempt to bring the interface's operating state to + # UP in case it is not already. + self.wait_for_link_up(ifname) + + # If primary nic is not found, check if this is it. The + # platform will attach the primary nic first so we + # won't be in primary_nic_found = false state for long. + if not primary_nic_found: + LOG.info("Checking if %s is the primary nic", + ifname) + (primary_nic_found, expected_nic_count) = ( + self._check_if_nic_is_primary(ifname)) + + # Exit criteria: check if we've discovered all nics + if (expected_nic_count != -1 + and len(nics_found) >= expected_nic_count): + LOG.info("Found all the nics for this VM.") + break + + except AssertionError as error: + report_diagnostic_event(error, logger_func=LOG.error) + + @azure_ds_telemetry_reporter + def _wait_for_all_nics_ready(self): + """Wait for nic(s) to be hot-attached. There may be multiple nics + depending on the customer request. + But only primary nic would be able to communicate with wireserver + and IMDS. So we detect and save the primary nic to be used later. + """ + + nl_sock = None + try: + nl_sock = netlink.create_bound_netlink_socket() + + report_ready_marker_present = bool( + os.path.isfile(REPORTED_READY_MARKER_FILE)) + + # Report ready if the marker file is not already present. + # The nic of the preprovisioned vm gets hot-detached as soon as + # we report ready. So no need to save the dhcp context. + self._report_ready_if_needed() + + has_nic_been_detached = bool( + os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE)) + + if not has_nic_been_detached: + LOG.info("NIC has not been detached yet.") + self._wait_for_nic_detach(nl_sock) + + # If we know that the preprovisioned nic has been detached, and we + # still have a fallback nic, then it means the VM must have + # rebooted as part of customer assignment, and all the nics have + # already been attached by the Azure platform. So there is no need + # to wait for nics to be hot-attached. + if not self.fallback_interface: + self._wait_for_hot_attached_nics(nl_sock) + else: + report_diagnostic_event("Skipping waiting for nic attach " + "because we already have a fallback " + "interface. Report Ready marker " + "present before detaching nics: %s" % + report_ready_marker_present, + logger_func=LOG.info) + except netlink.NetlinkCreateSocketError as e: + report_diagnostic_event(e, logger_func=LOG.warning) + raise + finally: + if nl_sock: + nl_sock.close() + def _poll_imds(self): """Poll IMDS for the new provisioning data until we get a valid response. Then return the returned JSON object.""" - url = IMDS_URL + "reprovisiondata?api-version=2017-04-02" + url = metadata_type.reprovisiondata.value headers = {"Metadata": "true"} nl_sock = None report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) @@ -705,17 +1028,31 @@ class DataSourceAzure(sources.DataSource): logger_func=LOG.warning) return False - LOG.debug("Wait for vnetswitch to happen") + # When the interface is hot-attached, we would have already + # done dhcp and set the dhcp context. In that case, skip + # the attempt to do dhcp. + is_ephemeral_ctx_present = self._ephemeral_dhcp_ctx is not None + msg = ("Unexpected error. Dhcp context is not expected to be already " + "set when we need to wait for vnet switch") + if is_ephemeral_ctx_present and report_ready: + report_diagnostic_event(msg, logger_func=LOG.error) + raise RuntimeError(msg) + while True: try: - # Save our EphemeralDHCPv4 context to avoid repeated dhcp - with events.ReportEventStack( - name="obtain-dhcp-lease", - description="obtain dhcp lease", - parent=azure_ds_reporter): - self._ephemeral_dhcp_ctx = EphemeralDHCPv4( - dhcp_log_func=dhcp_log_cb) - lease = self._ephemeral_dhcp_ctx.obtain_lease() + # Since is_ephemeral_ctx_present is set only once, this ensures + # that with regular reprovisioning, dhcp is always done every + # time the loop runs. + if not is_ephemeral_ctx_present: + # Save our EphemeralDHCPv4 context to avoid repeated dhcp + # later when we report ready + with events.ReportEventStack( + name="obtain-dhcp-lease", + description="obtain dhcp lease", + parent=azure_ds_reporter): + self._ephemeral_dhcp_ctx = EphemeralDHCPv4( + dhcp_log_func=dhcp_log_cb) + lease = self._ephemeral_dhcp_ctx.obtain_lease() if vnet_switched: dhcp_attempts += 1 @@ -737,17 +1074,10 @@ class DataSourceAzure(sources.DataSource): self._ephemeral_dhcp_ctx.clean_network() raise sources.InvalidMetaDataException(msg) - path = REPORTED_READY_MARKER_FILE - LOG.info( - "Creating a marker file to report ready: %s", path) - util.write_file(path, "{pid}: {time}\n".format( - pid=os.getpid(), time=time())) - report_diagnostic_event( - 'Successfully created reported ready marker file ' - 'while in the preprovisioning pool.', - logger_func=LOG.debug) + self._create_report_ready_marker() report_ready = False + LOG.debug("Wait for vnetswitch to happen") with events.ReportEventStack( name="wait-for-media-disconnect-connect", description="wait for vnet switch", @@ -863,6 +1193,35 @@ class DataSourceAzure(sources.DataSource): "connectivity issues: %s" % e, logger_func=LOG.warning) return False + def _should_reprovision_after_nic_attach(self, candidate_metadata) -> bool: + """Whether or not we should wait for nic attach and then poll + IMDS for reprovisioning data. Also sets a marker file to poll IMDS. + + The marker file is used for the following scenario: the VM boots into + wait for nic attach, which we expect to be proceeding infinitely until + the nic is attached. If for whatever reason the platform moves us to a + new host (for instance a hardware issue), we need to keep waiting. + However, since the VM reports ready to the Fabric, we will not attach + the ISO, thus cloud-init needs to have a way of knowing that it should + jump back into the waiting mode in order to retrieve the ovf_env. + + @param candidate_metadata: Metadata obtained from reading ovf-env. + @return: Whether to reprovision after waiting for nics to be attached. + """ + if not candidate_metadata: + return False + (_md, _userdata_raw, cfg, _files) = candidate_metadata + path = REPROVISION_NIC_ATTACH_MARKER_FILE + if (cfg.get('PreprovisionedVMType', None) == "Savable" or + os.path.isfile(path)): + if not os.path.isfile(path): + LOG.info("Creating a marker file to wait for nic attach: %s", + path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + return True + return False + def _should_reprovision(self, ret): """Whether or not we should poll IMDS for reprovisioning data. Also sets a marker file to poll IMDS. @@ -879,6 +1238,7 @@ class DataSourceAzure(sources.DataSource): (_md, _userdata_raw, cfg, _files) = ret path = REPROVISION_MARKER_FILE if (cfg.get('PreprovisionedVm') is True or + cfg.get('PreprovisionedVMType', None) == 'Running' or os.path.isfile(path)): if not os.path.isfile(path): LOG.info("Creating a marker file to poll imds: %s", @@ -944,6 +1304,8 @@ class DataSourceAzure(sources.DataSource): util.del_file(REPORTED_READY_MARKER_FILE) util.del_file(REPROVISION_MARKER_FILE) + util.del_file(REPROVISION_NIC_ATTACH_MARKER_FILE) + util.del_file(REPROVISION_NIC_DETACHED_MARKER_FILE) return fabric_data @azure_ds_telemetry_reporter @@ -1399,34 +1761,109 @@ def read_azure_ovf(contents): if 'ssh_pwauth' not in cfg and password: cfg['ssh_pwauth'] = True - cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom) + preprovisioning_cfg = _get_preprovisioning_cfgs(dom) + cfg = util.mergemanydict([cfg, preprovisioning_cfg]) return (md, ud, cfg) @azure_ds_telemetry_reporter -def _extract_preprovisioned_vm_setting(dom): - """Read the preprovision flag from the ovf. It should not - exist unless true.""" +def _get_preprovisioning_cfgs(dom): + """Read the preprovisioning related flags from ovf and populates a dict + with the info. + + Two flags are in use today: PreprovisionedVm bool and + PreprovisionedVMType enum. In the long term, the PreprovisionedVm bool + will be deprecated in favor of PreprovisionedVMType string/enum. + + Only these combinations of values are possible today: + - PreprovisionedVm=True and PreprovisionedVMType=Running + - PreprovisionedVm=False and PreprovisionedVMType=Savable + - PreprovisionedVm is missing and PreprovisionedVMType=Running/Savable + - PreprovisionedVm=False and PreprovisionedVMType is missing + + More specifically, this will never happen: + - PreprovisionedVm=True and PreprovisionedVMType=Savable + """ + cfg = { + "PreprovisionedVm": False, + "PreprovisionedVMType": None + } + platform_settings_section = find_child( dom.documentElement, lambda n: n.localName == "PlatformSettingsSection") if not platform_settings_section or len(platform_settings_section) == 0: LOG.debug("PlatformSettingsSection not found") - return False + return cfg platform_settings = find_child( platform_settings_section[0], lambda n: n.localName == "PlatformSettings") if not platform_settings or len(platform_settings) == 0: LOG.debug("PlatformSettings not found") - return False - preprovisionedVm = find_child( + return cfg + + # Read the PreprovisionedVm bool flag. This should be deprecated when the + # platform has removed PreprovisionedVm and only surfaces + # PreprovisionedVMType. + cfg["PreprovisionedVm"] = _get_preprovisionedvm_cfg_value( + platform_settings) + + cfg["PreprovisionedVMType"] = _get_preprovisionedvmtype_cfg_value( + platform_settings) + return cfg + + +@azure_ds_telemetry_reporter +def _get_preprovisionedvm_cfg_value(platform_settings): + preprovisionedVm = False + + # Read the PreprovisionedVm bool flag. This should be deprecated when the + # platform has removed PreprovisionedVm and only surfaces + # PreprovisionedVMType. + preprovisionedVmVal = find_child( platform_settings[0], lambda n: n.localName == "PreprovisionedVm") - if not preprovisionedVm or len(preprovisionedVm) == 0: + if not preprovisionedVmVal or len(preprovisionedVmVal) == 0: LOG.debug("PreprovisionedVm not found") - return False - return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue) + return preprovisionedVm + preprovisionedVm = util.translate_bool( + preprovisionedVmVal[0].firstChild.nodeValue) + + report_diagnostic_event( + "PreprovisionedVm: %s" % preprovisionedVm, logger_func=LOG.info) + + return preprovisionedVm + + +@azure_ds_telemetry_reporter +def _get_preprovisionedvmtype_cfg_value(platform_settings): + preprovisionedVMType = None + + # Read the PreprovisionedVMType value from the ovf. It can be + # 'Running' or 'Savable' or not exist. This enum value is intended to + # replace PreprovisionedVm bool flag in the long term. + # A Running VM is the same as preprovisioned VMs of today. This is + # equivalent to having PreprovisionedVm=True. + # A Savable VM is one whose nic is hot-detached immediately after it + # reports ready the first time to free up the network resources. + # Once assigned to customer, the customer-requested nics are + # hot-attached to it and reprovision happens like today. + preprovisionedVMTypeVal = find_child( + platform_settings[0], + lambda n: n.localName == "PreprovisionedVMType") + if (not preprovisionedVMTypeVal or len(preprovisionedVMTypeVal) == 0 or + preprovisionedVMTypeVal[0].firstChild is None): + LOG.debug("PreprovisionedVMType not found") + return preprovisionedVMType + + preprovisionedVMType = preprovisionedVMTypeVal[0].firstChild.nodeValue + + report_diagnostic_event( + "PreprovisionedVMType: %s" % preprovisionedVMType, + logger_func=LOG.info) + + return preprovisionedVMType def encrypt_pass(password, salt_id="$6$"): @@ -1588,8 +2025,10 @@ def _generate_network_config_from_fallback_config() -> dict: @azure_ds_telemetry_reporter -def get_metadata_from_imds(fallback_nic, retries): - """Query Azure's network metadata service, returning a dictionary. +def get_metadata_from_imds(fallback_nic, + retries, + md_type=metadata_type.compute): + """Query Azure's instance metadata service, returning a dictionary. If network is not up, setup ephemeral dhcp on fallback_nic to talk to the IMDS. For more info on IMDS: @@ -1604,7 +2043,7 @@ def get_metadata_from_imds(fallback_nic, retries): """ kwargs = {'logfunc': LOG.debug, 'msg': 'Crawl of Azure Instance Metadata Service (IMDS)', - 'func': _get_metadata_from_imds, 'args': (retries,)} + 'func': _get_metadata_from_imds, 'args': (retries, md_type,)} if net.is_up(fallback_nic): return util.log_time(**kwargs) else: @@ -1620,9 +2059,9 @@ def get_metadata_from_imds(fallback_nic, retries): @azure_ds_telemetry_reporter -def _get_metadata_from_imds(retries): +def _get_metadata_from_imds(retries, md_type=metadata_type.compute): - url = IMDS_URL + "instance?api-version=2019-06-01" + url = md_type.value headers = {"Metadata": "true"} try: response = readurl( diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py index c2ad587b..e13d6834 100644 --- a/cloudinit/sources/helpers/netlink.py +++ b/cloudinit/sources/helpers/netlink.py @@ -185,6 +185,54 @@ def read_rta_oper_state(data): return InterfaceOperstate(ifname, operstate) +def wait_for_nic_attach_event(netlink_socket, existing_nics): + '''Block until a single nic is attached. + + :param: netlink_socket: netlink_socket to receive events + :param: existing_nics: List of existing nics so that we can skip them. + :raises: AssertionError if netlink_socket is none. + ''' + LOG.debug("Preparing to wait for nic attach.") + ifname = None + + def should_continue_cb(iname, carrier, prevCarrier): + if iname in existing_nics: + return True + nonlocal ifname + ifname = iname + return False + + # We can return even if the operational state of the new nic is DOWN + # because we set it to UP before doing dhcp. + read_netlink_messages(netlink_socket, + None, + [RTM_NEWLINK], + [OPER_UP, OPER_DOWN], + should_continue_cb) + return ifname + + +def wait_for_nic_detach_event(netlink_socket): + '''Block until a single nic is detached and its operational state is down. + + :param: netlink_socket: netlink_socket to receive events. + ''' + LOG.debug("Preparing to wait for nic detach.") + ifname = None + + def should_continue_cb(iname, carrier, prevCarrier): + nonlocal ifname + ifname = iname + return False + + read_netlink_messages(netlink_socket, + None, + [RTM_DELLINK], + [OPER_DOWN], + should_continue_cb) + return ifname + + def wait_for_media_disconnect_connect(netlink_socket, ifname): '''Block until media disconnect and connect has happened on an interface. Listens on netlink socket to receive netlink events and when the carrier @@ -198,10 +246,42 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname): assert (netlink_socket is not None), ("netlink socket is none") assert (ifname is not None), ("interface name is none") assert (len(ifname) > 0), ("interface name cannot be empty") + + def should_continue_cb(iname, carrier, prevCarrier): + # check for carrier down, up sequence + isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP) + if isVnetSwitch: + LOG.debug("Media switch happened on %s.", ifname) + return False + return True + + LOG.debug("Wait for media disconnect and reconnect to happen") + read_netlink_messages(netlink_socket, + ifname, + [RTM_NEWLINK, RTM_DELLINK], + [OPER_UP, OPER_DOWN], + should_continue_cb) + + +def read_netlink_messages(netlink_socket, + ifname_filter, + rtm_types, + operstates, + should_continue_callback): + ''' Reads from the netlink socket until the condition specified by + the continuation callback is met. + + :param: netlink_socket: netlink_socket to receive events. + :param: ifname_filter: if not None, will only listen for this interface. + :param: rtm_types: Type of netlink events to listen for. + :param: operstates: Operational states to listen. + :param: should_continue_callback: Specifies when to stop listening. + ''' + if netlink_socket is None: + raise RuntimeError("Netlink socket is none") + data = bytes() carrier = OPER_UP prevCarrier = OPER_UP - data = bytes() - LOG.debug("Wait for media disconnect and reconnect to happen") while True: recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT) if recv_data is None: @@ -223,26 +303,26 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname): padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1) offset = offset + padlen LOG.debug('offset to next netlink message: %d', offset) - # Ignore any messages not new link or del link - if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]: + # Continue if we are not interested in this message. + if nlheader.type not in rtm_types: continue interface_state = read_rta_oper_state(nl_msg) if interface_state is None: LOG.debug('Failed to read rta attributes: %s', interface_state) continue - if interface_state.ifname != ifname: + if (ifname_filter is not None and + interface_state.ifname != ifname_filter): LOG.debug( "Ignored netlink event on interface %s. Waiting for %s.", - interface_state.ifname, ifname) + interface_state.ifname, ifname_filter) continue - if interface_state.operstate not in [OPER_UP, OPER_DOWN]: + if interface_state.operstate not in operstates: continue prevCarrier = carrier carrier = interface_state.operstate - # check for carrier down, up sequence - isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP) - if isVnetSwitch: - LOG.debug("Media switch happened on %s.", ifname) + if not should_continue_callback(interface_state.ifname, + carrier, + prevCarrier): return data = data[offset:] diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py index 10760bd6..cafe3961 100644 --- a/cloudinit/sources/helpers/tests/test_netlink.py +++ b/cloudinit/sources/helpers/tests/test_netlink.py @@ -9,9 +9,10 @@ import codecs from cloudinit.sources.helpers.netlink import ( NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket, read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect, + wait_for_nic_attach_event, wait_for_nic_detach_event, OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT, - OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK, - RTM_GETLINK, MAX_SIZE) + OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_DELLINK, + RTM_SETLINK, RTM_GETLINK, MAX_SIZE) def int_to_bytes(i): @@ -133,6 +134,75 @@ class TestParseNetlinkMessage(CiTestCase): str(context.exception)) +@mock.patch('cloudinit.sources.helpers.netlink.socket.socket') +@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket') +class TestNicAttachDetach(CiTestCase): + with_logs = True + + def _media_switch_data(self, ifname, msg_type, operstate): + '''construct netlink data with specified fields''' + if ifname and operstate is not None: + data = bytearray(48) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3, + bytes, 5, 16, int_to_bytes(operstate)) + elif ifname: + data = bytearray(40) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes) + elif operstate: + data = bytearray(40) + struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16, + int_to_bytes(operstate)) + struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0) + return data + + def test_nic_attached_oper_down(self, m_read_netlink_socket, m_socket): + '''Test for a new nic attached''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_op_down] + ifread = wait_for_nic_attach_event(m_socket, []) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + def test_nic_attached_oper_up(self, m_read_netlink_socket, m_socket): + '''Test for a new nic attached''' + ifname = "eth0" + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_op_up] + ifread = wait_for_nic_attach_event(m_socket, []) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + def test_nic_attach_ignore_existing(self, m_read_netlink_socket, m_socket): + '''Test that we read only the interfaces we are interested in.''' + data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN) + data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_eth0, data_eth1] + ifread = wait_for_nic_attach_event(m_socket, ["eth0"]) + self.assertEqual(m_read_netlink_socket.call_count, 2) + self.assertEqual("eth1", ifread) + + def test_nic_attach_read_first(self, m_read_netlink_socket, m_socket): + '''Test that we read only the interfaces we are interested in.''' + data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN) + data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_eth0, data_eth1] + ifread = wait_for_nic_attach_event(m_socket, ["eth1"]) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual("eth0", ifread) + + def test_nic_detached(self, m_read_netlink_socket, m_socket): + '''Test for an existing nic detached''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_DELLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_op_down] + ifread = wait_for_nic_detach_event(m_socket) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + @mock.patch('cloudinit.sources.helpers.netlink.socket.socket') @mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket') class TestWaitForMediaDisconnectConnect(CiTestCase): diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 534314aa..e363c1f9 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -11,6 +11,7 @@ from cloudinit.version import version_string as vs from cloudinit.tests.helpers import ( HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call, ExitStack, resourceLocation) +from cloudinit.sources.helpers import netlink import copy import crypt @@ -78,6 +79,8 @@ def construct_valid_ovf_env(data=None, pubkeys=None, if platform_settings: for k, v in platform_settings.items(): content += "<%s>%s\n" % (k, v, k) + if "PreprovisionedVMType" not in platform_settings: + content += """""" content += """ """ @@ -156,6 +159,31 @@ SECONDARY_INTERFACE = { } } +IMDS_NETWORK_METADATA = { + "interface": [ + { + "macAddress": "000D3A047598", + "ipv6": { + "ipAddress": [] + }, + "ipv4": { + "subnet": [ + { + "prefix": "24", + "address": "10.0.0.0" + } + ], + "ipAddress": [ + { + "privateIpAddress": "10.0.0.4", + "publicIpAddress": "104.46.124.81" + } + ] + } + } + ] +} + MOCKPATH = 'cloudinit.sources.DataSourceAzure.' @@ -366,8 +394,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2019-06-01" @mock.patch(MOCKPATH + 'readurl') - @mock.patch(MOCKPATH + 'EphemeralDHCPv4') - @mock.patch(MOCKPATH + 'net.is_up') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4', autospec=True) + @mock.patch(MOCKPATH + 'net.is_up', autospec=True) def test_get_metadata_does_not_dhcp_if_network_is_up( self, m_net_is_up, m_dhcp, m_readurl): """Do not perform DHCP setup when nic is already up.""" @@ -384,9 +412,66 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time self.logs.getvalue()) - @mock.patch(MOCKPATH + 'readurl') - @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting') + @mock.patch(MOCKPATH + 'readurl', autospec=True) + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') + @mock.patch(MOCKPATH + 'net.is_up') + def test_get_compute_metadata_uses_compute_url( + self, m_net_is_up, m_dhcp, m_readurl): + """Make sure readurl is called with the correct url when accessing + network metadata""" + m_net_is_up.return_value = True + m_readurl.return_value = url_helper.StringResponse( + json.dumps(IMDS_NETWORK_METADATA).encode('utf-8')) + + dsaz.get_metadata_from_imds( + 'eth0', retries=3, md_type=dsaz.metadata_type.compute) + m_readurl.assert_called_with( + "http://169.254.169.254/metadata/instance?api-version=" + "2019-06-01", exception_cb=mock.ANY, + headers=mock.ANY, retries=mock.ANY, + timeout=mock.ANY) + + @mock.patch(MOCKPATH + 'readurl', autospec=True) + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') + @mock.patch(MOCKPATH + 'net.is_up') + def test_get_network_metadata_uses_network_url( + self, m_net_is_up, m_dhcp, m_readurl): + """Make sure readurl is called with the correct url when accessing + network metadata""" + m_net_is_up.return_value = True + m_readurl.return_value = url_helper.StringResponse( + json.dumps(IMDS_NETWORK_METADATA).encode('utf-8')) + + dsaz.get_metadata_from_imds( + 'eth0', retries=3, md_type=dsaz.metadata_type.network) + m_readurl.assert_called_with( + "http://169.254.169.254/metadata/instance/network?api-version=" + "2019-06-01", exception_cb=mock.ANY, + headers=mock.ANY, retries=mock.ANY, + timeout=mock.ANY) + + @mock.patch(MOCKPATH + 'readurl', autospec=True) + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') @mock.patch(MOCKPATH + 'net.is_up') + def test_get_default_metadata_uses_compute_url( + self, m_net_is_up, m_dhcp, m_readurl): + """Make sure readurl is called with the correct url when accessing + network metadata""" + m_net_is_up.return_value = True + m_readurl.return_value = url_helper.StringResponse( + json.dumps(IMDS_NETWORK_METADATA).encode('utf-8')) + + dsaz.get_metadata_from_imds( + 'eth0', retries=3) + m_readurl.assert_called_with( + "http://169.254.169.254/metadata/instance?api-version=" + "2019-06-01", exception_cb=mock.ANY, + headers=mock.ANY, retries=mock.ANY, + timeout=mock.ANY) + + @mock.patch(MOCKPATH + 'readurl', autospec=True) + @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting', autospec=True) + @mock.patch(MOCKPATH + 'net.is_up', autospec=True) def test_get_metadata_performs_dhcp_when_network_is_down( self, m_net_is_up, m_dhcp, m_readurl): """Perform DHCP setup when nic is not up.""" @@ -410,7 +495,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS) @mock.patch('cloudinit.url_helper.time.sleep') - @mock.patch(MOCKPATH + 'net.is_up') + @mock.patch(MOCKPATH + 'net.is_up', autospec=True) def test_get_metadata_from_imds_empty_when_no_imds_present( self, m_net_is_up, m_sleep): """Return empty dict when IMDS network metadata is absent.""" @@ -431,7 +516,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): @mock.patch('requests.Session.request') @mock.patch('cloudinit.url_helper.time.sleep') - @mock.patch(MOCKPATH + 'net.is_up') + @mock.patch(MOCKPATH + 'net.is_up', autospec=True) def test_get_metadata_from_imds_retries_on_timeout( self, m_net_is_up, m_sleep, m_request): """Retry IMDS network metadata on timeout errors.""" @@ -801,6 +886,7 @@ scbus-1 on xpt0 bus 0 'sys_cfg': {}} dsrc = self._get_ds(data) expected_cfg = { + 'PreprovisionedVMType': None, 'PreprovisionedVm': False, 'datasource': {'Azure': {'agent_command': 'my_command'}}, 'system_info': {'default_user': {'name': u'myuser'}}} @@ -864,6 +950,66 @@ scbus-1 on xpt0 bus 0 dsrc.crawl_metadata() self.assertEqual(1, m_report_ready.call_count) + @mock.patch( + 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting') + @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') + @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure.' + '_wait_for_all_nics_ready') + def test_crawl_metadata_waits_for_nic_on_savable_vms( + self, detect_nics, poll_imds_func, report_ready_func, m_write, m_dhcp + ): + """If reprovisioning, report ready at the end""" + ovfenv = construct_valid_ovf_env( + platform_settings={"PreprovisionedVMType": "Savable", + "PreprovisionedVm": "True"} + ) + + data = { + 'ovfcontent': ovfenv, + 'sys_cfg': {} + } + dsrc = self._get_ds(data) + poll_imds_func.return_value = ovfenv + dsrc.crawl_metadata() + self.assertEqual(1, report_ready_func.call_count) + self.assertEqual(1, detect_nics.call_count) + + @mock.patch( + 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting') + @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') + @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure.' + '_wait_for_all_nics_ready') + @mock.patch('os.path.isfile') + def test_detect_nics_when_marker_present( + self, is_file, detect_nics, poll_imds_func, report_ready_func, m_write, + m_dhcp): + """If reprovisioning, wait for nic attach if marker present""" + + def is_file_ret(key): + return key == dsaz.REPROVISION_NIC_ATTACH_MARKER_FILE + + is_file.side_effect = is_file_ret + ovfenv = construct_valid_ovf_env() + + data = { + 'ovfcontent': ovfenv, + 'sys_cfg': {} + } + + dsrc = self._get_ds(data) + poll_imds_func.return_value = ovfenv + dsrc.crawl_metadata() + self.assertEqual(1, report_ready_func.call_count) + self.assertEqual(1, detect_nics.call_count) + @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') @mock.patch('cloudinit.sources.helpers.netlink.' 'wait_for_media_disconnect_connect') @@ -1526,7 +1672,7 @@ scbus-1 on xpt0 bus 0 @mock.patch('cloudinit.net.get_interface_mac') @mock.patch('cloudinit.net.get_devicelist') @mock.patch('cloudinit.net.device_driver') - @mock.patch('cloudinit.net.generate_fallback_config') + @mock.patch('cloudinit.net.generate_fallback_config', autospec=True) def test_fallback_network_config(self, mock_fallback, mock_dd, mock_devlist, mock_get_mac): """On absent IMDS network data, generate network fallback config.""" @@ -1561,7 +1707,7 @@ scbus-1 on xpt0 bus 0 blacklist_drivers=['mlx4_core', 'mlx5_core'], config_driver=True) - @mock.patch(MOCKPATH + 'net.get_interfaces') + @mock.patch(MOCKPATH + 'net.get_interfaces', autospec=True) @mock.patch(MOCKPATH + 'util.is_FreeBSD') def test_blacklist_through_distro( self, m_is_freebsd, m_net_get_interfaces): @@ -1583,17 +1729,17 @@ scbus-1 on xpt0 bus 0 m_net_get_interfaces.assert_called_with( blacklist_drivers=dsaz.BLACKLIST_DRIVERS) - @mock.patch(MOCKPATH + 'subp.subp') + @mock.patch(MOCKPATH + 'subp.subp', autospec=True) def test_get_hostname_with_no_args(self, m_subp): dsaz.get_hostname() m_subp.assert_called_once_with(("hostname",), capture=True) - @mock.patch(MOCKPATH + 'subp.subp') + @mock.patch(MOCKPATH + 'subp.subp', autospec=True) def test_get_hostname_with_string_arg(self, m_subp): dsaz.get_hostname(hostname_command="hostname") m_subp.assert_called_once_with(("hostname",), capture=True) - @mock.patch(MOCKPATH + 'subp.subp') + @mock.patch(MOCKPATH + 'subp.subp', autospec=True) def test_get_hostname_with_iterable_arg(self, m_subp): dsaz.get_hostname(hostname_command=("hostname",)) m_subp.assert_called_once_with(("hostname",), capture=True) @@ -2224,6 +2370,29 @@ class TestPreprovisioningReadAzureOvfFlag(CiTestCase): ret = dsaz.read_azure_ovf(content) cfg = ret[2] self.assertFalse(cfg['PreprovisionedVm']) + self.assertEqual(None, cfg["PreprovisionedVMType"]) + + def test_read_azure_ovf_with_running_type(self): + """The read_azure_ovf method should set PreprovisionedVMType + cfg flag to Running.""" + content = construct_valid_ovf_env( + platform_settings={"PreprovisionedVMType": "Running", + "PreprovisionedVm": "True"}) + ret = dsaz.read_azure_ovf(content) + cfg = ret[2] + self.assertTrue(cfg['PreprovisionedVm']) + self.assertEqual("Running", cfg['PreprovisionedVMType']) + + def test_read_azure_ovf_with_savable_type(self): + """The read_azure_ovf method should set PreprovisionedVMType + cfg flag to Savable.""" + content = construct_valid_ovf_env( + platform_settings={"PreprovisionedVMType": "Savable", + "PreprovisionedVm": "True"}) + ret = dsaz.read_azure_ovf(content) + cfg = ret[2] + self.assertTrue(cfg['PreprovisionedVm']) + self.assertEqual("Savable", cfg['PreprovisionedVMType']) @mock.patch('os.path.isfile') @@ -2273,6 +2442,227 @@ class TestPreprovisioningShouldReprovision(CiTestCase): _poll_imds.assert_called_with() +class TestPreprovisioningHotAttachNics(CiTestCase): + + def setUp(self): + super(TestPreprovisioningHotAttachNics, self).setUp() + self.tmp = self.tmp_dir() + self.waagent_d = self.tmp_path('/var/lib/waagent', self.tmp) + self.paths = helpers.Paths({'cloud_dir': self.tmp}) + dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d + self.paths = helpers.Paths({'cloud_dir': self.tmp}) + + @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_detach_event', + autospec=True) + @mock.patch(MOCKPATH + 'util.write_file', autospec=True) + def test_nic_detach_writes_marker(self, m_writefile, m_detach): + """When we detect that a nic gets detached, we write a marker for it""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + nl_sock = mock.MagicMock() + dsa._wait_for_nic_detach(nl_sock) + m_detach.assert_called_with(nl_sock) + self.assertEqual(1, m_detach.call_count) + m_writefile.assert_called_with( + dsaz.REPROVISION_NIC_DETACHED_MARKER_FILE, mock.ANY) + + @mock.patch(MOCKPATH + 'util.write_file', autospec=True) + @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting') + @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready') + @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach') + def test_detect_nic_attach_reports_ready_and_waits_for_detach( + self, m_detach, m_report_ready, m_dhcp, m_fallback_if, + m_writefile): + """Report ready first and then wait for nic detach""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa._wait_for_all_nics_ready() + m_fallback_if.return_value = "Dummy interface" + self.assertEqual(1, m_report_ready.call_count) + self.assertEqual(1, m_detach.call_count) + self.assertEqual(1, m_writefile.call_count) + self.assertEqual(1, m_dhcp.call_count) + m_writefile.assert_called_with(dsaz.REPORTED_READY_MARKER_FILE, + mock.ANY) + + @mock.patch('os.path.isfile') + @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting') + @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready') + @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach') + def test_detect_nic_attach_skips_report_ready_when_marker_present( + self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile): + """Skip reporting ready if we already have a marker file.""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + + def isfile(key): + return key == dsaz.REPORTED_READY_MARKER_FILE + + m_isfile.side_effect = isfile + dsa._wait_for_all_nics_ready() + m_fallback_if.return_value = "Dummy interface" + self.assertEqual(0, m_report_ready.call_count) + self.assertEqual(0, m_dhcp.call_count) + self.assertEqual(1, m_detach.call_count) + + @mock.patch('os.path.isfile') + @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting') + @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready') + @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach') + def test_detect_nic_attach_skips_nic_detach_when_marker_present( + self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile): + """Skip wait for nic detach if it already happened.""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + + m_isfile.return_value = True + dsa._wait_for_all_nics_ready() + m_fallback_if.return_value = "Dummy interface" + self.assertEqual(0, m_report_ready.call_count) + self.assertEqual(0, m_dhcp.call_count) + self.assertEqual(0, m_detach.call_count) + + @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up', autospec=True) + @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event') + @mock.patch('cloudinit.sources.net.find_fallback_nic') + @mock.patch(MOCKPATH + 'get_metadata_from_imds') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') + @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach') + @mock.patch('os.path.isfile') + def test_wait_for_nic_attach_if_no_fallback_interface( + self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if, + m_attach, m_link_up): + """Wait for nic attach if we do not have a fallback interface""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + lease = { + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'} + + m_isfile.return_value = True + m_attach.return_value = "eth0" + dhcp_ctx = mock.MagicMock(lease=lease) + dhcp_ctx.obtain_lease.return_value = lease + m_dhcpv4.return_value = dhcp_ctx + m_imds.return_value = IMDS_NETWORK_METADATA + m_fallback_if.return_value = None + + dsa._wait_for_all_nics_ready() + + self.assertEqual(0, m_detach.call_count) + self.assertEqual(1, m_attach.call_count) + self.assertEqual(1, m_dhcpv4.call_count) + self.assertEqual(1, m_imds.call_count) + self.assertEqual(1, m_link_up.call_count) + m_link_up.assert_called_with(mock.ANY, "eth0") + + @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up') + @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event') + @mock.patch('cloudinit.sources.net.find_fallback_nic') + @mock.patch(MOCKPATH + 'get_metadata_from_imds') + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') + @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach') + @mock.patch('os.path.isfile') + def test_wait_for_nic_attach_multinic_attach( + self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if, + m_attach, m_link_up): + """Wait for nic attach if we do not have a fallback interface""" + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + lease = { + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'} + m_attach_call_count = 0 + + def nic_attach_ret(nl_sock, nics_found): + nonlocal m_attach_call_count + if m_attach_call_count == 0: + m_attach_call_count = m_attach_call_count + 1 + return "eth0" + return "eth1" + + def network_metadata_ret(ifname, retries, type): + # Simulate two NICs by adding the same one twice. + md = IMDS_NETWORK_METADATA + md['interface'].append(md['interface'][0]) + if ifname == "eth0": + return md + raise requests.Timeout('Fake connection timeout') + + m_isfile.return_value = True + m_attach.side_effect = nic_attach_ret + dhcp_ctx = mock.MagicMock(lease=lease) + dhcp_ctx.obtain_lease.return_value = lease + m_dhcpv4.return_value = dhcp_ctx + m_imds.side_effect = network_metadata_ret + m_fallback_if.return_value = None + + dsa._wait_for_all_nics_ready() + + self.assertEqual(0, m_detach.call_count) + self.assertEqual(2, m_attach.call_count) + # DHCP and network metadata calls will only happen on the primary NIC. + self.assertEqual(1, m_dhcpv4.call_count) + self.assertEqual(1, m_imds.call_count) + self.assertEqual(2, m_link_up.call_count) + + @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up') + def test_wait_for_link_up_returns_if_already_up( + self, m_is_link_up): + """Waiting for link to be up should return immediately if the link is + already up.""" + + distro_cls = distros.fetch('ubuntu') + distro = distro_cls('ubuntu', {}, self.paths) + dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) + m_is_link_up.return_value = True + + dsa.wait_for_link_up("eth0") + self.assertEqual(1, m_is_link_up.call_count) + + @mock.patch(MOCKPATH + 'util.write_file') + @mock.patch('cloudinit.net.read_sys_net') + @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up') + def test_wait_for_link_up_writes_to_device_file( + self, m_is_link_up, m_read_sys_net, m_writefile): + """Waiting for link to be up should return immediately if the link is + already up.""" + + distro_cls = distros.fetch('ubuntu') + distro = distro_cls('ubuntu', {}, self.paths) + dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) + + callcount = 0 + + def linkup(key): + nonlocal callcount + if callcount == 0: + callcount += 1 + return False + return True + + m_is_link_up.side_effect = linkup + + dsa.wait_for_link_up("eth0") + self.assertEqual(2, m_is_link_up.call_count) + self.assertEqual(1, m_read_sys_net.call_count) + self.assertEqual(2, m_writefile.call_count) + + @mock.patch('cloudinit.sources.helpers.netlink.' + 'create_bound_netlink_socket') + def test_wait_for_all_nics_ready_raises_if_socket_fails(self, m_socket): + """Waiting for all nics should raise exception if netlink socket + creation fails.""" + + m_socket.side_effect = netlink.NetlinkCreateSocketError + distro_cls = distros.fetch('ubuntu') + distro = distro_cls('ubuntu', {}, self.paths) + dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) + + self.assertRaises(netlink.NetlinkCreateSocketError, + dsa._wait_for_all_nics_ready) + # dsa._wait_for_all_nics_ready() + + @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') @mock.patch('cloudinit.sources.helpers.netlink.' @@ -2330,6 +2720,24 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls') self.assertEqual(4, self.tries, 'Expected 4 total reads from IMDS') + @mock.patch('os.path.isfile') + def test_poll_imds_skips_dhcp_if_ctx_present( + self, m_isfile, report_ready_func, fake_resp, m_media_switch, + m_dhcp, m_net): + """The poll_imds function should reuse the dhcp ctx if it is already + present. This happens when we wait for nic to be hot-attached before + polling for reprovisiondata. Note that if this ctx is set when + _poll_imds is called, then it is not expected to be waiting for + media_disconnect_connect either.""" + report_file = self.tmp_path('report_marker', self.tmp) + m_isfile.return_value = True + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa._ephemeral_dhcp_ctx = "Dummy dhcp ctx" + with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): + dsa._poll_imds() + self.assertEqual(0, m_dhcp.call_count) + self.assertEqual(0, m_media_switch.call_count) + def test_does_not_poll_imds_report_ready_when_marker_file_exists( self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net): """poll_imds should not call report ready when the reported ready @@ -2390,7 +2798,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): @mock.patch(MOCKPATH + 'util.is_FreeBSD') @mock.patch('cloudinit.sources.helpers.netlink.' 'wait_for_media_disconnect_connect') -@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') +@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network', autospec=True) @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') @mock.patch('requests.Session.request') class TestAzureDataSourcePreprovisioning(CiTestCase): @@ -2412,7 +2820,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): m_dhcp.return_value = [{ 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0'}] - url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02' + url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01' host = "169.254.169.254" full_url = url.format(host) m_request.return_value = mock.MagicMock(status_code=200, text="ovf", @@ -2445,7 +2853,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'}] - url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02' + url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01' host = "169.254.169.254" full_url = url.format(host) hostname = "myhost" diff --git a/tools/.github-cla-signers b/tools/.github-cla-signers index 9b594a44..1e0c3ea4 100644 --- a/tools/.github-cla-signers +++ b/tools/.github-cla-signers @@ -1,6 +1,7 @@ ader1990 AlexBaranowski Aman306 +aswinrajamannar beezly bipinbachhao BirknerAlex -- cgit v1.2.3