From f0bc02d7e221c9aa5982b267739481420c761ead Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 9 Oct 2018 21:46:35 +0000 Subject: instance-data: Add standard keys platform and subplatform. Refactor ec2. Add the following instance-data.json standardized keys: * v1._beta_keys: List any v1 keys in beta development, e.g. ['subplatform']. * v1.public_ssh_keys: List of any cloud-provided ssh keys for the instance. * v1.platform: String representing the cloud platform api supporting the datasource. For example: 'ec2' for aws, aliyun and brightbox cloud names. * v1.subplatform: String with more details about the source of the metadata consumed. For example, metadata uri, config drive device path or seed directory. To support the new platform and subplatform standardized instance-data, DataSource and its subclasses grew platform and subplatform attributes. The platform attribute defaults to the lowercase string datasource name at self.dsname. This method is overridden in NoCloud, Ec2 and ConfigDrive datasources. The subplatform attribute calls a _get_subplatform method which will return a string containing a simple slug for subplatform type such as metadata, seed-dir or config-drive followed by a detailed uri, device or directory path where the datasource consumed its configuration. As part of this work, DatasourceEC2 methods _get_data and _crawl_metadata have been refactored for a few reasons: - crawl_metadata is now a read-only operation, persisting no attributes on the datasource instance and returns a dictionary of consumed metadata. - crawl_metadata now closely represents the raw stucture of the ec2 metadata consumed, so that end-users can leverage public ec2 metadata documentation where possible. - crawl_metadata adds a '_metadata_api_version' key to the crawled ds.metadata to advertise what version of EC2's api was consumed by cloud-init. - _get_data now does all the processing of crawl_metadata and saves datasource instance attributes userdata_raw, metadata etc. Additional drive-bys: * unit test rework for test_altcloud and test_azure to simplify mocks and make use of existing util and test_helpers functions. --- cloudinit/sources/DataSourceAzure.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 783445e1..39391d01 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -351,6 +351,14 @@ class DataSourceAzure(sources.DataSource): metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files) return metadata + def _get_subplatform(self): + """Return the subplatform metadata source details.""" + if self.seed.startswith('/dev'): + subplatform_type = 'config-disk' + else: + subplatform_type = 'seed-dir' + return '%s (%s)' % (subplatform_type, self.seed) + def crawl_metadata(self): """Walk all instance metadata sources returning a dict on success. -- cgit v1.2.3 From 1d5e9aefdab06a2574d78e644deed6c6fa1da171 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Wed, 17 Oct 2018 18:47:35 +0000 Subject: azure: Add apply_network_config option to disable network from IMDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Azure generates network configuration from the IMDS service and removes any preexisting hotplug network scripts which exist in Azure cloud images. Add a datasource configuration option which allows for writing a default network configuration which sets up dhcp on eth0 and leave the hotplug handling to the cloud-image scripts. To disable network-config from Azure IMDS, add the following to /etc/cloud/cloud.cfg.d/99-azure-no-imds-network.cfg: datasource:   Azure:     apply_network_config: False LP: #1798424 --- cloudinit/sources/DataSourceAzure.py | 11 +++++- doc/rtd/topics/datasources/azure.rst | 46 ++++++++++++++++++++++ tests/unittests/test_datasource/test_azure.py | 56 +++++++++++++++++++++++++-- 3 files changed, 107 insertions(+), 6 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 39391d01..d0358e96 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -207,7 +207,9 @@ BUILTIN_DS_CONFIG = { }, 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH}, 'dhclient_lease_file': LEASE_FILE, + 'apply_network_config': True, # Use IMDS published network configuration } +# RELEASE_BLOCKER: Xenial and earlier apply_network_config default is False BUILTIN_CLOUD_CONFIG = { 'disk_setup': { @@ -458,7 +460,8 @@ class DataSourceAzure(sources.DataSource): except sources.InvalidMetaDataException as e: LOG.warning('Could not crawl Azure metadata: %s', e) return False - if self.distro and self.distro.name == 'ubuntu': + if (self.distro and self.distro.name == 'ubuntu' and + self.ds_cfg.get('apply_network_config')): maybe_remove_ubuntu_network_config_scripts() # Process crawled data and augment with various config defaults @@ -619,7 +622,11 @@ class DataSourceAzure(sources.DataSource): the blacklisted devices. """ if not self._network_config: - self._network_config = parse_network_config(self._metadata_imds) + if self.ds_cfg.get('apply_network_config'): + nc_src = self._metadata_imds + else: + nc_src = None + self._network_config = parse_network_config(nc_src) return self._network_config diff --git a/doc/rtd/topics/datasources/azure.rst b/doc/rtd/topics/datasources/azure.rst index 559011ef..f73c3694 100644 --- a/doc/rtd/topics/datasources/azure.rst +++ b/doc/rtd/topics/datasources/azure.rst @@ -57,6 +57,52 @@ in order to use waagent.conf with cloud-init, the following settings are recomme ResourceDisk.MountPoint=/mnt +Configuration +------------- +The following configuration can be set for the datasource in system +configuration (in `/etc/cloud/cloud.cfg` or `/etc/cloud/cloud.cfg.d/`). + +The settings that may be configured are: + + * **agent_command**: Either __builtin__ (default) or a command to run to getcw + metadata. If __builtin__, get metadata from walinuxagent. Otherwise run the + provided command to obtain metadata. + * **apply_network_config**: Boolean set to True to use network configuration + described by Azure's IMDS endpoint instead of fallback network config of + dhcp on eth0. Default is True. For Ubuntu 16.04 or earlier, default is False. + * **data_dir**: Path used to read metadata files and write crawled data. + * **dhclient_lease_file**: The fallback lease file to source when looking for + custom DHCP option 245 from Azure fabric. + * **disk_aliases**: A dictionary defining which device paths should be + interpreted as ephemeral images. See cc_disk_setup module for more info. + * **hostname_bounce**: A dictionary Azure hostname bounce behavior to react to + metadata changes. + * **hostname_bounce**: A dictionary Azure hostname bounce behavior to react to + metadata changes. Azure will throttle ifup/down in some cases after metadata + has been updated to inform dhcp server about updated hostnames. + * **set_hostname**: Boolean set to True when we want Azure to set the hostname + based on metadata. + +An example configuration with the default values is provided below: + +.. sourcecode:: yaml + + datasource: + Azure: + agent_command: __builtin__ + apply_network_config: true + data_dir: /var/lib/waagent + dhclient_lease_file: /var/lib/dhcp/dhclient.eth0.leases + disk_aliases: + ephemeral0: /dev/disk/cloud/azure_resource + hostname_bounce: + interface: eth0 + command: builtin + policy: true + hostname_command: hostname + set_hostname: true + + Userdata -------- Userdata is provided to cloud-init inside the ovf-env.xml file. Cloud-init diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 0f4b7bf7..cd6e7e74 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -256,7 +256,8 @@ scbus-1 on xpt0 bus 0 ]) return dsaz - def _get_ds(self, data, agent_command=None, distro=None): + def _get_ds(self, data, agent_command=None, distro=None, + apply_network=None): def dsdevs(): return data.get('dsdevs', []) @@ -312,6 +313,8 @@ scbus-1 on xpt0 bus 0 data.get('sys_cfg', {}), distro=distro, paths=self.paths) if agent_command is not None: dsrc.ds_cfg['agent_command'] = agent_command + if apply_network is not None: + dsrc.ds_cfg['apply_network_config'] = apply_network return dsrc @@ -434,14 +437,26 @@ fdescfs /dev/fd fdescfs rw 0 0 def test_get_data_on_ubuntu_will_remove_network_scripts(self): """get_data will remove ubuntu net scripts on Ubuntu distro.""" + sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {'HostName': "myhost", 'UserName': "myuser"} data = {'ovfcontent': construct_valid_ovf_env(data=odata), - 'sys_cfg': {}} + 'sys_cfg': sys_cfg} dsrc = self._get_ds(data, distro='ubuntu') dsrc.get_data() self.m_remove_ubuntu_network_scripts.assert_called_once_with() + def test_get_data_on_ubuntu_will_not_remove_network_scripts_disabled(self): + """When apply_network_config false, do not remove scripts on Ubuntu.""" + sys_cfg = {'datasource': {'Azure': {'apply_network_config': False}}} + odata = {'HostName': "myhost", 'UserName': "myuser"} + data = {'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': sys_cfg} + + dsrc = self._get_ds(data, distro='ubuntu') + dsrc.get_data() + self.m_remove_ubuntu_network_scripts.assert_not_called() + def test_crawl_metadata_returns_structured_data_and_caches_nothing(self): """Return all structured metadata and cache no class attributes.""" yaml_cfg = "{agent_command: my_command}\n" @@ -523,8 +538,10 @@ fdescfs /dev/fd fdescfs rw 0 0 def test_network_config_set_from_imds(self): """Datasource.network_config returns IMDS network data.""" + sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {} - data = {'ovfcontent': construct_valid_ovf_env(data=odata)} + data = {'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': sys_cfg} expected_network_config = { 'ethernets': { 'eth0': {'set-name': 'eth0', @@ -803,9 +820,10 @@ fdescfs /dev/fd fdescfs rw 0 0 @mock.patch('cloudinit.net.generate_fallback_config') def test_imds_network_config(self, mock_fallback): """Network config is generated from IMDS network data when present.""" + sys_cfg = {'datasource': {'Azure': {'apply_network_config': True}}} odata = {'HostName': "myhost", 'UserName': "myuser"} data = {'ovfcontent': construct_valid_ovf_env(data=odata), - 'sys_cfg': {}} + 'sys_cfg': sys_cfg} dsrc = self._get_ds(data) ret = dsrc.get_data() @@ -821,6 +839,36 @@ fdescfs /dev/fd fdescfs rw 0 0 self.assertEqual(expected_cfg, dsrc.network_config) mock_fallback.assert_not_called() + @mock.patch('cloudinit.net.get_interface_mac') + @mock.patch('cloudinit.net.get_devicelist') + @mock.patch('cloudinit.net.device_driver') + @mock.patch('cloudinit.net.generate_fallback_config') + def test_imds_network_ignored_when_apply_network_config_false( + self, mock_fallback, mock_dd, mock_devlist, mock_get_mac): + """When apply_network_config is False, use fallback instead of IMDS.""" + sys_cfg = {'datasource': {'Azure': {'apply_network_config': False}}} + odata = {'HostName': "myhost", 'UserName': "myuser"} + data = {'ovfcontent': construct_valid_ovf_env(data=odata), + 'sys_cfg': sys_cfg} + fallback_config = { + 'version': 1, + 'config': [{ + 'type': 'physical', 'name': 'eth0', + 'mac_address': '00:11:22:33:44:55', + 'params': {'driver': 'hv_netsvc'}, + 'subnets': [{'type': 'dhcp'}], + }] + } + mock_fallback.return_value = fallback_config + + mock_devlist.return_value = ['eth0'] + mock_dd.return_value = ['hv_netsvc'] + mock_get_mac.return_value = '00:11:22:33:44:55' + + dsrc = self._get_ds(data) + self.assertTrue(dsrc.get_data()) + self.assertEqual(dsrc.network_config, fallback_config) + @mock.patch('cloudinit.net.get_interface_mac') @mock.patch('cloudinit.net.get_devicelist') @mock.patch('cloudinit.net.device_driver') -- cgit v1.2.3 From 907395104bb5850d221924365102cc5ab0eca2f1 Mon Sep 17 00:00:00 2001 From: asakkurr Date: Wed, 31 Oct 2018 20:19:15 +0000 Subject: azure: report ready to fabric after reprovision and reduce logging When reusing a preprovisioned VM, report ready to Azure fabric as soon as we get the reprovision data and the goal state so that we are not delayed by the cloud-init stage switch, saving 2-3 seconds. Also reduce logging when polling IMDS for reprovision data. LP: #1799594 --- cloudinit/sources/DataSourceAzure.py | 15 ++++++-- cloudinit/url_helper.py | 17 +++++---- tests/unittests/test_datasource/test_azure.py | 51 +++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 9 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index d0358e96..8642915e 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -267,6 +267,7 @@ class DataSourceAzure(sources.DataSource): dsname = 'Azure' _negotiated = False _metadata_imds = sources.UNSET + lease_info = None def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -406,8 +407,10 @@ class DataSourceAzure(sources.DataSource): LOG.warning("%s was not mountable", cdev) continue + should_report_ready_after_reprovision = False if reprovision or self._should_reprovision(ret): ret = self._reprovision() + should_report_ready_after_reprovision = True imds_md = get_metadata_from_imds( self.fallback_interface, retries=3) (md, userdata_raw, cfg, files) = ret @@ -434,6 +437,11 @@ class DataSourceAzure(sources.DataSource): crawled_data['metadata']['random_seed'] = seed crawled_data['metadata']['instance-id'] = util.read_dmi_data( 'system-uuid') + + if should_report_ready_after_reprovision: + LOG.info("Reporting ready to Azure after getting ReprovisionData") + self._report_ready(lease=self.lease_info) + return crawled_data def _is_platform_viable(self): @@ -522,6 +530,7 @@ class DataSourceAzure(sources.DataSource): while True: try: with EphemeralDHCPv4() as lease: + self.lease_info = lease if report_ready: path = REPORTED_READY_MARKER_FILE LOG.info( @@ -531,13 +540,13 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=lease) report_ready = False return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True).contents + exception_cb=exc_cb, infinite=True, + log_req_resp=False).contents except UrlError: pass def _report_ready(self, lease): - """Tells the fabric provisioning has completed - before we go into our polling loop.""" + """Tells the fabric provisioning has completed """ try: get_metadata_from_fabric(None, lease['unknown-245']) except Exception: diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 8067979e..cf57dbd5 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -199,7 +199,7 @@ def _get_ssl_args(url, ssl_details): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers=None, headers_cb=None, ssl_details=None, check_status=True, allow_redirects=True, exception_cb=None, - session=None, infinite=False): + session=None, infinite=False, log_req_resp=True): url = _cleanurl(url) req_args = { 'url': url, @@ -256,9 +256,11 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, continue filtered_req_args[k] = v try: - LOG.debug("[%s/%s] open '%s' with %s configuration", i, - "infinite" if infinite else manual_tries, url, - filtered_req_args) + + if log_req_resp: + LOG.debug("[%s/%s] open '%s' with %s configuration", i, + "infinite" if infinite else manual_tries, url, + filtered_req_args) if session is None: session = requests.Session() @@ -294,8 +296,11 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, break if (infinite and sec_between > 0) or \ (i + 1 < manual_tries and sec_between > 0): - LOG.debug("Please wait %s seconds while we wait to try again", - sec_between) + + if log_req_resp: + LOG.debug( + "Please wait %s seconds while we wait to try again", + sec_between) time.sleep(sec_between) if excps: raise excps[-1] diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index cd6e7e74..4c5c6c12 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -513,6 +513,57 @@ fdescfs /dev/fd fdescfs rw 0 0 dsrc.crawl_metadata() self.assertEqual(str(cm.exception), error_msg) + @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') + @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds') + def test_crawl_metadata_on_reprovision_reports_ready( + self, poll_imds_func, + report_ready_func, + m_write): + """If reprovisioning, report ready at the end""" + ovfenv = construct_valid_ovf_env( + platform_settings={"PreprovisionedVm": "True"}) + + data = {'ovfcontent': ovfenv, + 'sys_cfg': {}} + dsrc = self._get_ds(data) + poll_imds_func.return_value = ovfenv + dsrc.crawl_metadata() + self.assertEqual(1, report_ready_func.call_count) + + @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') + @mock.patch( + 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') + @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') + @mock.patch('cloudinit.sources.DataSourceAzure.readurl') + def test_crawl_metadata_on_reprovision_reports_ready_using_lease( + self, m_readurl, m_dhcp, + m_net, report_ready_func, + m_write): + """If reprovisioning, report ready using the obtained lease""" + ovfenv = construct_valid_ovf_env( + platform_settings={"PreprovisionedVm": "True"}) + + data = {'ovfcontent': ovfenv, + 'sys_cfg': {}} + dsrc = self._get_ds(data) + + lease = { + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'unknown-245': '624c3620'} + m_dhcp.return_value = [lease] + + reprovision_ovfenv = construct_valid_ovf_env() + m_readurl.return_value = url_helper.StringResponse( + reprovision_ovfenv.encode('utf-8')) + + dsrc.crawl_metadata() + self.assertEqual(2, report_ready_func.call_count) + report_ready_func.assert_called_with(lease=lease) + def test_waagent_d_has_0700_perms(self): # we expect /var/lib/waagent to be created 0700 dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) -- cgit v1.2.3 From 58476e719bad7dbe1f0bd09a61ff484ad17d8e55 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Thu, 1 Nov 2018 22:50:07 +0000 Subject: azure: remove /etc/netplan/90-hotplug-azure.yaml when net from IMDS There was a typo in the seeded filename s/azure-hotplug/hotplug-azure/. --- cloudinit/sources/DataSourceAzure.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 8642915e..7bdd43d8 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -57,7 +57,7 @@ IMDS_URL = "http://169.254.169.254/metadata/" # List of static scripts and network config artifacts created by # stock ubuntu suported images. UBUNTU_EXTENDED_NETWORK_SCRIPTS = [ - '/etc/netplan/90-azure-hotplug.yaml', + '/etc/netplan/90-hotplug-azure.yaml', '/usr/local/sbin/ephemeral_eth.sh', '/etc/udev/rules.d/10-net-device-added.rules', '/run/network/interfaces.ephemeral.d', @@ -1211,7 +1211,7 @@ def maybe_remove_ubuntu_network_config_scripts(paths=None): additional interfaces which get attached by a customer at some point after initial boot. Since the Azure datasource can now regenerate network configuration as metadata reports these new devices, we no longer - want the udev rules or netplan's 90-azure-hotplug.yaml to configure + want the udev rules or netplan's 90-hotplug-azure.yaml to configure networking on eth1 or greater as it might collide with cloud-init's configuration. -- cgit v1.2.3 From d910ecd15de642d73a36e935704e54370f93c45b Mon Sep 17 00:00:00 2001 From: asakkurr Date: Mon, 12 Nov 2018 17:16:09 +0000 Subject: azure: fix regression introduced when persisting ephemeral dhcp lease In commitish 9073951 azure datasource tried to leverage stale DHCP information obtained from EphemeralDHCPv4 context manager to report updated provisioning status to the fabric earlier in the boot process. Unfortunately the stale ephemeral network configuration had already been torn down in preparation to bring up IMDS network config so the report attempt failed on timeout. This branch introduces obtain_lease and clean_network public methods on EphemeralDHCPv4 to allow for setup and teardown of ephemeral network configuration without using a context manager. Azure datasource now uses this to persist ephemeral network configuration across multiple contexts during provisioning to avoid multiple DHCP roundtrips. --- cloudinit/net/dhcp.py | 42 ++++++++++++++++++------ cloudinit/sources/DataSourceAzure.py | 47 ++++++++++++++++----------- tests/unittests/test_datasource/test_azure.py | 3 +- 3 files changed, 62 insertions(+), 30 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index 12cf5097..bdc5799f 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -40,34 +40,56 @@ class EphemeralDHCPv4(object): def __init__(self, iface=None): self.iface = iface self._ephipv4 = None + self.lease = None def __enter__(self): + """Setup sandboxed dhcp context.""" + return self.obtain_lease() + + def __exit__(self, excp_type, excp_value, excp_traceback): + """Teardown sandboxed dhcp context.""" + self.clean_network() + + def clean_network(self): + """Exit _ephipv4 context to teardown of ip configuration performed.""" + if self.lease: + self.lease = None + if not self._ephipv4: + return + self._ephipv4.__exit__(None, None, None) + + def obtain_lease(self): + """Perform dhcp discovery in a sandboxed environment if possible. + + @return: A dict representing dhcp options on the most recent lease + obtained from the dhclient discovery if run, otherwise an error + is raised. + + @raises: NoDHCPLeaseError if no leases could be obtained. + """ + if self.lease: + return self.lease try: leases = maybe_perform_dhcp_discovery(self.iface) except InvalidDHCPLeaseFileError: raise NoDHCPLeaseError() if not leases: raise NoDHCPLeaseError() - lease = leases[-1] + self.lease = leases[-1] LOG.debug("Received dhcp lease on %s for %s/%s", - lease['interface'], lease['fixed-address'], - lease['subnet-mask']) + self.lease['interface'], self.lease['fixed-address'], + self.lease['subnet-mask']) nmap = {'interface': 'interface', 'ip': 'fixed-address', 'prefix_or_mask': 'subnet-mask', 'broadcast': 'broadcast-address', 'router': 'routers'} - kwargs = dict([(k, lease.get(v)) for k, v in nmap.items()]) + kwargs = dict([(k, self.lease.get(v)) for k, v in nmap.items()]) if not kwargs['broadcast']: kwargs['broadcast'] = bcip(kwargs['prefix_or_mask'], kwargs['ip']) ephipv4 = EphemeralIPv4Network(**kwargs) ephipv4.__enter__() self._ephipv4 = ephipv4 - return lease - - def __exit__(self, excp_type, excp_value, excp_traceback): - if not self._ephipv4: - return - self._ephipv4.__exit__(excp_type, excp_value, excp_traceback) + return self.lease def maybe_perform_dhcp_discovery(nic=None): diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 7bdd43d8..5ec6096f 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -267,7 +267,6 @@ class DataSourceAzure(sources.DataSource): dsname = 'Azure' _negotiated = False _metadata_imds = sources.UNSET - lease_info = None def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -281,6 +280,7 @@ class DataSourceAzure(sources.DataSource): self._network_config = None # Regenerate network config new_instance boot and every boot self.update_events['network'].add(EventType.BOOT) + self._ephemeral_dhcp_ctx = None def __str__(self): root = sources.DataSource.__str__(self) @@ -407,10 +407,9 @@ class DataSourceAzure(sources.DataSource): LOG.warning("%s was not mountable", cdev) continue - should_report_ready_after_reprovision = False - if reprovision or self._should_reprovision(ret): + perform_reprovision = reprovision or self._should_reprovision(ret) + if perform_reprovision: ret = self._reprovision() - should_report_ready_after_reprovision = True imds_md = get_metadata_from_imds( self.fallback_interface, retries=3) (md, userdata_raw, cfg, files) = ret @@ -438,9 +437,16 @@ class DataSourceAzure(sources.DataSource): crawled_data['metadata']['instance-id'] = util.read_dmi_data( 'system-uuid') - if should_report_ready_after_reprovision: + if perform_reprovision: LOG.info("Reporting ready to Azure after getting ReprovisionData") - self._report_ready(lease=self.lease_info) + use_cached_ephemeral = (net.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None)) + if use_cached_ephemeral: + self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) + self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral + else: + with EphemeralDHCPv4() as lease: + self._report_ready(lease=lease) return crawled_data @@ -529,20 +535,23 @@ class DataSourceAzure(sources.DataSource): while True: try: - with EphemeralDHCPv4() as lease: - self.lease_info = lease - if report_ready: - path = REPORTED_READY_MARKER_FILE - LOG.info( - "Creating a marker file to report ready: %s", path) - util.write_file(path, "{pid}: {time}\n".format( - pid=os.getpid(), time=time())) - self._report_ready(lease=lease) - report_ready = False - return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True, - log_req_resp=False).contents + # Save our EphemeralDHCPv4 context so we avoid repeated dhcp + self._ephemeral_dhcp_ctx = EphemeralDHCPv4() + lease = self._ephemeral_dhcp_ctx.obtain_lease() + if report_ready: + path = REPORTED_READY_MARKER_FILE + LOG.info( + "Creating a marker file to report ready: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + self._report_ready(lease=lease) + report_ready = False + return readurl(url, timeout=1, headers=headers, + exception_cb=exc_cb, infinite=True, + log_req_resp=False).contents except UrlError: + # Teardown our EphemeralDHCPv4 context on failure as we retry + self._ephemeral_dhcp_ctx.clean_network() pass def _report_ready(self, lease): diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 4c5c6c12..1dc69adb 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -513,6 +513,7 @@ fdescfs /dev/fd fdescfs rw 0 0 dsrc.crawl_metadata() self.assertEqual(str(cm.exception), error_msg) + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4') @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') @mock.patch( 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') @@ -520,7 +521,7 @@ fdescfs /dev/fd fdescfs rw 0 0 def test_crawl_metadata_on_reprovision_reports_ready( self, poll_imds_func, report_ready_func, - m_write): + m_write, m_dhcp): """If reprovisioning, report ready at the end""" ovfenv = construct_valid_ovf_env( platform_settings={"PreprovisionedVm": "True"}) -- cgit v1.2.3 From 6f9512049bbb594c3f01ffcd2ab25ae4e016f01e Mon Sep 17 00:00:00 2001 From: Jason Zions Date: Mon, 12 Nov 2018 18:43:42 +0000 Subject: azure: Accept variation in error msg from mount for ntfs volumes If Azure detects an ntfs filesystem type during mount attempt, it should still report the resource device as reformattable. There are slight differences in error message format on RedHat and SuSE. This patch simplifies the expected error match to work on both distributions. LP: #1799338 --- cloudinit/sources/DataSourceAzure.py | 2 +- tests/unittests/test_datasource/test_azure.py | 29 +++++++++++++-------------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 5ec6096f..6e1797ea 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -725,7 +725,7 @@ def can_dev_be_reformatted(devpath, preserve_ntfs): file_count = util.mount_cb(cand_path, count_files, mtype="ntfs", update_env_for_mount={'LANG': 'C'}) except util.MountFailedError as e: - if "mount: unknown filesystem type 'ntfs'" in str(e): + if "unknown filesystem type 'ntfs'" in str(e): return True, (bmsg + ' but this system cannot mount NTFS,' ' assuming there are no important files.' ' Formatting allowed.') diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 1dc69adb..8ad4368c 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -1511,21 +1511,20 @@ class TestCanDevBeReformatted(CiTestCase): '/dev/sda1': {'num': 1, 'fs': 'ntfs', 'files': []} }}}) - err = ("Unexpected error while running command.\n", - "Command: ['mount', '-o', 'ro,sync', '-t', 'auto', ", - "'/dev/sda1', '/fake-tmp/dir']\n" - "Exit code: 32\n" - "Reason: -\n" - "Stdout: -\n" - "Stderr: mount: unknown filesystem type 'ntfs'") - self.m_mount_cb.side_effect = MountFailedError( - 'Failed mounting %s to %s due to: %s' % - ('/dev/sda', '/fake-tmp/dir', err)) - - value, msg = dsaz.can_dev_be_reformatted('/dev/sda', - preserve_ntfs=False) - self.assertTrue(value) - self.assertIn('cannot mount NTFS, assuming', msg) + error_msgs = [ + "Stderr: mount: unknown filesystem type 'ntfs'", # RHEL + "Stderr: mount: /dev/sdb1: unknown filesystem type 'ntfs'" # SLES + ] + + for err_msg in error_msgs: + self.m_mount_cb.side_effect = MountFailedError( + "Failed mounting %s to %s due to: \nUnexpected.\n%s" % + ('/dev/sda', '/fake-tmp/dir', err_msg)) + + value, msg = dsaz.can_dev_be_reformatted('/dev/sda', + preserve_ntfs=False) + self.assertTrue(value) + self.assertIn('cannot mount NTFS, assuming', msg) def test_never_destroy_ntfs_config_false(self): """Normally formattable situation with never_destroy_ntfs set.""" -- cgit v1.2.3 From 6062595b83e08e0f12e1fe6d8e367d8db9d91ef8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 13 Nov 2018 03:14:58 +0000 Subject: azure: retry imds polling on requests.Timeout There is an infrequent race when the booting instance can hit the IMDS service before it is fully available. This results in a requests.ConnectTimeout being raised. Azure's retry_callback logic now retries on either 404s or Timeouts. LP:1800223 --- cloudinit/sources/DataSourceAzure.py | 18 +++------------- cloudinit/tests/test_url_helper.py | 25 +++++++++++++++++++++- cloudinit/url_helper.py | 14 +++++++++++++ tests/unittests/test_datasource/test_azure.py | 30 +++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 16 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 6e1797ea..9e8a1a8b 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -22,7 +22,7 @@ from cloudinit.event import EventType from cloudinit.net.dhcp import EphemeralDHCPv4 from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric -from cloudinit.url_helper import readurl, UrlError +from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc from cloudinit import util LOG = logging.getLogger(__name__) @@ -526,13 +526,6 @@ class DataSourceAzure(sources.DataSource): report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) LOG.debug("Start polling IMDS") - def exc_cb(msg, exception): - if isinstance(exception, UrlError) and exception.code == 404: - return True - # If we get an exception while trying to call IMDS, we - # call DHCP and setup the ephemeral network to acquire the new IP. - return False - while True: try: # Save our EphemeralDHCPv4 context so we avoid repeated dhcp @@ -547,7 +540,7 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=lease) report_ready = False return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True, + exception_cb=retry_on_url_exc, infinite=True, log_req_resp=False).contents except UrlError: # Teardown our EphemeralDHCPv4 context on failure as we retry @@ -1187,17 +1180,12 @@ def get_metadata_from_imds(fallback_nic, retries): def _get_metadata_from_imds(retries): - def retry_on_url_error(msg, exception): - if isinstance(exception, UrlError) and exception.code == 404: - return True # Continue retries - return False # Stop retries on all other exceptions - url = IMDS_URL + "instance?api-version=2017-12-01" headers = {"Metadata": "true"} try: response = readurl( url, timeout=1, headers=headers, retries=retries, - exception_cb=retry_on_url_error) + exception_cb=retry_on_url_exc) except Exception as e: LOG.debug('Ignoring IMDS instance metadata: %s', e) return {} diff --git a/cloudinit/tests/test_url_helper.py b/cloudinit/tests/test_url_helper.py index 113249d9..aa9f3ec1 100644 --- a/cloudinit/tests/test_url_helper.py +++ b/cloudinit/tests/test_url_helper.py @@ -1,10 +1,12 @@ # This file is part of cloud-init. See LICENSE file for license information. -from cloudinit.url_helper import oauth_headers, read_file_or_url +from cloudinit.url_helper import ( + NOT_FOUND, UrlError, oauth_headers, read_file_or_url, retry_on_url_exc) from cloudinit.tests.helpers import CiTestCase, mock, skipIf from cloudinit import util import httpretty +import requests try: @@ -64,3 +66,24 @@ class TestReadFileOrUrl(CiTestCase): result = read_file_or_url(url) self.assertEqual(result.contents, data) self.assertEqual(str(result), data.decode('utf-8')) + + +class TestRetryOnUrlExc(CiTestCase): + + def test_do_not_retry_non_urlerror(self): + """When exception is not UrlError return False.""" + myerror = IOError('something unexcpected') + self.assertFalse(retry_on_url_exc(msg='', exc=myerror)) + + def test_perform_retries_on_not_found(self): + """When exception is UrlError with a 404 status code return True.""" + myerror = UrlError(cause=RuntimeError( + 'something was not found'), code=NOT_FOUND) + self.assertTrue(retry_on_url_exc(msg='', exc=myerror)) + + def test_perform_retries_on_timeout(self): + """When exception is a requests.Timout return True.""" + myerror = UrlError(cause=requests.Timeout('something timed out')) + self.assertTrue(retry_on_url_exc(msg='', exc=myerror)) + +# vi: ts=4 expandtab diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index cf57dbd5..396d69ae 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -554,4 +554,18 @@ def oauth_headers(url, consumer_key, token_key, token_secret, consumer_secret, _uri, signed_headers, _body = client.sign(url) return signed_headers + +def retry_on_url_exc(msg, exc): + """readurl exception_cb that will retry on NOT_FOUND and Timeout. + + Returns False to raise the exception from readurl, True to retry. + """ + if not isinstance(exc, UrlError): + return False + if exc.code == NOT_FOUND: + return True + if exc.cause and isinstance(exc.cause, requests.Timeout): + return True + return False + # vi: ts=4 expandtab diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 8ad4368c..56484b27 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -17,6 +17,7 @@ import crypt import httpretty import json import os +import requests import stat import xml.etree.ElementTree as ET import yaml @@ -184,6 +185,35 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time self.logs.getvalue()) + @mock.patch('requests.Session.request') + @mock.patch('cloudinit.url_helper.time.sleep') + @mock.patch(MOCKPATH + 'net.is_up') + def test_get_metadata_from_imds_retries_on_timeout( + self, m_net_is_up, m_sleep, m_request): + """Retry IMDS network metadata on timeout errors.""" + + self.attempt = 0 + m_request.side_effect = requests.Timeout('Fake Connection Timeout') + + def retry_callback(request, uri, headers): + self.attempt += 1 + raise requests.Timeout('Fake connection timeout') + + httpretty.register_uri( + httpretty.GET, + dsaz.IMDS_URL + 'instance?api-version=2017-12-01', + body=retry_callback) + + m_net_is_up.return_value = True # skips dhcp + + self.assertEqual({}, dsaz.get_metadata_from_imds('eth9', retries=3)) + + m_net_is_up.assert_called_with('eth9') + self.assertEqual([mock.call(1)]*3, m_sleep.call_args_list) + self.assertIn( + "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time + self.logs.getvalue()) + class TestAzureDataSource(CiTestCase): -- cgit v1.2.3 From 8f812a15fde01173c0dd5b7e1a77b61031fd93e4 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Thu, 15 Nov 2018 22:55:42 +0000 Subject: azure: _poll_imds only retry on 404. Fail on Timeout Upon URL timeout, _poll_imds is expected to re-dhcp to get updated IP configuration. We don't want to indefinitely retry because the instance likely has invalid IP configuration. LP: #1803598 --- cloudinit/sources/DataSourceAzure.py | 9 ++++++- tests/unittests/test_datasource/test_azure.py | 34 ++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 9e8a1a8b..2a3e5677 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -526,6 +526,13 @@ class DataSourceAzure(sources.DataSource): report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) LOG.debug("Start polling IMDS") + def exc_cb(msg, exception): + if isinstance(exception, UrlError) and exception.code == 404: + return True + # If we get an exception while trying to call IMDS, we + # call DHCP and setup the ephemeral network to acquire the new IP. + return False + while True: try: # Save our EphemeralDHCPv4 context so we avoid repeated dhcp @@ -540,7 +547,7 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=lease) report_ready = False return readurl(url, timeout=1, headers=headers, - exception_cb=retry_on_url_exc, infinite=True, + exception_cb=exc_cb, infinite=True, log_req_resp=False).contents except UrlError: # Teardown our EphemeralDHCPv4 context on failure as we retry diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 56484b27..5ea7ae5e 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -1687,22 +1687,44 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.paths = helpers.Paths({'cloud_dir': self.tmp}) dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d - @mock.patch(MOCKPATH + 'util.write_file') - def test_poll_imds_calls_report_ready(self, write_f, report_ready_func, + @mock.patch(MOCKPATH + 'EphemeralDHCPv4') + def test_poll_imds_re_dhcp_on_timeout(self, m_dhcpv4, report_ready_func, fake_resp, m_dhcp, m_net): - """The poll_imds will call report_ready after creating marker file.""" - report_marker = self.tmp_path('report_marker', self.tmp) + """The poll_imds will retry DHCP on IMDS timeout.""" + report_file = self.tmp_path('report_marker', self.tmp) lease = { 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'} m_dhcp.return_value = [lease] + + dhcp_ctx = mock.MagicMock(lease=lease) + dhcp_ctx.obtain_lease.return_value = lease + m_dhcpv4.return_value = dhcp_ctx + + self.tries = 0 + + def fake_timeout_once(**kwargs): + self.tries += 1 + if self.tries == 1: + raise requests.Timeout('Fake connection timeout') + elif self.tries == 2: + response = requests.Response() + response.status_code = 404 + raise requests.exceptions.HTTPError( + "fake 404", response=response) + # Third try should succeed and stop retries or redhcp + return mock.MagicMock(status_code=200, text="good", content="good") + + fake_resp.side_effect = fake_timeout_once + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) - mock_path = (MOCKPATH + 'REPORTED_READY_MARKER_FILE') - with mock.patch(mock_path, report_marker): + with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() self.assertEqual(report_ready_func.call_count, 1) report_ready_func.assert_called_with(lease=lease) + self.assertEqual(2, m_dhcpv4.call_count, 'Expected 2 DHCP calls') + self.assertEqual(3, self.tries, 'Expected 3 total reads from IMDS') def test_poll_imds_report_ready_false(self, report_ready_func, fake_resp, m_dhcp, m_net): -- cgit v1.2.3 From c7c395ce0f3d024243192947fee32d7fc6c063f5 Mon Sep 17 00:00:00 2001 From: Adam DePue Date: Thu, 29 Nov 2018 18:22:14 +0000 Subject: Azure: fix copy/paste error in error handling when reading azure ovf. Check the appropriate variables based on code review. Correcting what seems to be a copy/paste mistake for the error handling from a few lines above. --- cloudinit/sources/DataSourceAzure.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 2a3e5677..be82ec4d 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -953,12 +953,12 @@ def read_azure_ovf(contents): lambda n: n.localName == "LinuxProvisioningConfigurationSet") - if len(results) == 0: + if len(lpcs_nodes) == 0: raise NonAzureDataSource("No LinuxProvisioningConfigurationSet") - if len(results) > 1: + if len(lpcs_nodes) > 1: raise BrokenAzureDataSource("found '%d' %ss" % ("LinuxProvisioningConfigurationSet", - len(results))) + len(lpcs_nodes))) lpcs = lpcs_nodes[0] if not lpcs.hasChildNodes(): -- cgit v1.2.3 From bf7917159dbb292c9fcdef82b004e0f5ecb32c16 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Thu, 29 Nov 2018 21:53:18 +0000 Subject: azure: detect vnet migration via netlink media change event Replace Azure pre-provision polling on IMDS with a blocking call which watches for netlink link state change messages. The media change event happens when a pre-provisioned VM has been activated and is connected to the users virtual network and cloud-init can then resume operation to complete image instantiation. --- cloudinit/sources/DataSourceAzure.py | 31 +- cloudinit/sources/helpers/netlink.py | 250 ++++++++++++++++ cloudinit/sources/helpers/tests/test_netlink.py | 373 ++++++++++++++++++++++++ tests/unittests/test_datasource/test_azure.py | 40 ++- 4 files changed, 678 insertions(+), 16 deletions(-) create mode 100644 cloudinit/sources/helpers/netlink.py create mode 100644 cloudinit/sources/helpers/tests/test_netlink.py (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index be82ec4d..e076d5dc 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -22,6 +22,7 @@ from cloudinit.event import EventType from cloudinit.net.dhcp import EphemeralDHCPv4 from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric +from cloudinit.sources.helpers import netlink from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc from cloudinit import util @@ -409,6 +410,10 @@ class DataSourceAzure(sources.DataSource): perform_reprovision = reprovision or self._should_reprovision(ret) if perform_reprovision: + if util.is_FreeBSD(): + msg = "Free BSD is not supported for PPS VMs" + LOG.error(msg) + raise sources.InvalidMetaDataException(msg) ret = self._reprovision() imds_md = get_metadata_from_imds( self.fallback_interface, retries=3) @@ -523,8 +528,8 @@ class DataSourceAzure(sources.DataSource): response. Then return the returned JSON object.""" url = IMDS_URL + "reprovisiondata?api-version=2017-04-02" headers = {"Metadata": "true"} + nl_sock = None report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) - LOG.debug("Start polling IMDS") def exc_cb(msg, exception): if isinstance(exception, UrlError) and exception.code == 404: @@ -533,12 +538,19 @@ class DataSourceAzure(sources.DataSource): # call DHCP and setup the ephemeral network to acquire the new IP. return False + LOG.debug("Wait for vnetswitch to happen") while True: try: # Save our EphemeralDHCPv4 context so we avoid repeated dhcp self._ephemeral_dhcp_ctx = EphemeralDHCPv4() lease = self._ephemeral_dhcp_ctx.obtain_lease() if report_ready: + try: + nl_sock = netlink.create_bound_netlink_socket() + except netlink.NetlinkCreateSocketError as e: + LOG.warning(e) + self._ephemeral_dhcp_ctx.clean_network() + return path = REPORTED_READY_MARKER_FILE LOG.info( "Creating a marker file to report ready: %s", path) @@ -546,13 +558,24 @@ class DataSourceAzure(sources.DataSource): pid=os.getpid(), time=time())) self._report_ready(lease=lease) report_ready = False - return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True, - log_req_resp=False).contents + try: + netlink.wait_for_media_disconnect_connect( + nl_sock, lease['interface']) + except AssertionError as error: + LOG.error(error) + return + self._ephemeral_dhcp_ctx.clean_network() + else: + return readurl(url, timeout=1, headers=headers, + exception_cb=exc_cb, infinite=True, + log_req_resp=False).contents except UrlError: # Teardown our EphemeralDHCPv4 context on failure as we retry self._ephemeral_dhcp_ctx.clean_network() pass + finally: + if nl_sock: + nl_sock.close() def _report_ready(self, lease): """Tells the fabric provisioning has completed """ diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py new file mode 100644 index 00000000..d377ae3d --- /dev/null +++ b/cloudinit/sources/helpers/netlink.py @@ -0,0 +1,250 @@ +# Author: Tamilmani Manoharan +# +# This file is part of cloud-init. See LICENSE file for license information. + +from cloudinit import log as logging +from cloudinit import util +from collections import namedtuple + +import os +import select +import socket +import struct + +LOG = logging.getLogger(__name__) + +# http://man7.org/linux/man-pages/man7/netlink.7.html +RTMGRP_LINK = 1 +NLMSG_NOOP = 1 +NLMSG_ERROR = 2 +NLMSG_DONE = 3 +RTM_NEWLINK = 16 +RTM_DELLINK = 17 +RTM_GETLINK = 18 +RTM_SETLINK = 19 +MAX_SIZE = 65535 +RTA_DATA_OFFSET = 32 +MSG_TYPE_OFFSET = 16 +SELECT_TIMEOUT = 60 + +NLMSGHDR_FMT = "IHHII" +IFINFOMSG_FMT = "BHiII" +NLMSGHDR_SIZE = struct.calcsize(NLMSGHDR_FMT) +IFINFOMSG_SIZE = struct.calcsize(IFINFOMSG_FMT) +RTATTR_START_OFFSET = NLMSGHDR_SIZE + IFINFOMSG_SIZE +RTA_DATA_START_OFFSET = 4 +PAD_ALIGNMENT = 4 + +IFLA_IFNAME = 3 +IFLA_OPERSTATE = 16 + +# https://www.kernel.org/doc/Documentation/networking/operstates.txt +OPER_UNKNOWN = 0 +OPER_NOTPRESENT = 1 +OPER_DOWN = 2 +OPER_LOWERLAYERDOWN = 3 +OPER_TESTING = 4 +OPER_DORMANT = 5 +OPER_UP = 6 + +RTAAttr = namedtuple('RTAAttr', ['length', 'rta_type', 'data']) +InterfaceOperstate = namedtuple('InterfaceOperstate', ['ifname', 'operstate']) +NetlinkHeader = namedtuple('NetlinkHeader', ['length', 'type', 'flags', 'seq', + 'pid']) + + +class NetlinkCreateSocketError(RuntimeError): + '''Raised if netlink socket fails during create or bind.''' + pass + + +def create_bound_netlink_socket(): + '''Creates netlink socket and bind on netlink group to catch interface + down/up events. The socket will bound only on RTMGRP_LINK (which only + includes RTM_NEWLINK/RTM_DELLINK/RTM_GETLINK events). The socket is set to + non-blocking mode since we're only receiving messages. + + :returns: netlink socket in non-blocking mode + :raises: NetlinkCreateSocketError + ''' + try: + netlink_socket = socket.socket(socket.AF_NETLINK, + socket.SOCK_RAW, + socket.NETLINK_ROUTE) + netlink_socket.bind((os.getpid(), RTMGRP_LINK)) + netlink_socket.setblocking(0) + except socket.error as e: + msg = "Exception during netlink socket create: %s" % e + raise NetlinkCreateSocketError(msg) + LOG.debug("Created netlink socket") + return netlink_socket + + +def get_netlink_msg_header(data): + '''Gets netlink message type and length + + :param: data read from netlink socket + :returns: netlink message type + :raises: AssertionError if data is None or data is not >= NLMSGHDR_SIZE + struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Type of message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sender port ID */ + }; + ''' + assert (data is not None), ("data is none") + assert (len(data) >= NLMSGHDR_SIZE), ( + "data is smaller than netlink message header") + msg_len, msg_type, flags, seq, pid = struct.unpack(NLMSGHDR_FMT, + data[:MSG_TYPE_OFFSET]) + LOG.debug("Got netlink msg of type %d", msg_type) + return NetlinkHeader(msg_len, msg_type, flags, seq, pid) + + +def read_netlink_socket(netlink_socket, timeout=None): + '''Select and read from the netlink socket if ready. + + :param: netlink_socket: specify which socket object to read from + :param: timeout: specify a timeout value (integer) to wait while reading, + if none, it will block indefinitely until socket ready for read + :returns: string of data read (max length = ) from socket, + if no data read, returns None + :raises: AssertionError if netlink_socket is None + ''' + assert (netlink_socket is not None), ("netlink socket is none") + read_set, _, _ = select.select([netlink_socket], [], [], timeout) + # Incase of timeout,read_set doesn't contain netlink socket. + # just return from this function + if netlink_socket not in read_set: + return None + LOG.debug("netlink socket ready for read") + data = netlink_socket.recv(MAX_SIZE) + if data is None: + LOG.error("Reading from Netlink socket returned no data") + return data + + +def unpack_rta_attr(data, offset): + '''Unpack a single rta attribute. + + :param: data: string of data read from netlink socket + :param: offset: starting offset of RTA Attribute + :return: RTAAttr object with length, type and data. On error, return None. + :raises: AssertionError if data is None or offset is not integer. + ''' + assert (data is not None), ("data is none") + assert (type(offset) == int), ("offset is not integer") + assert (offset >= RTATTR_START_OFFSET), ( + "rta offset is less than expected length") + length = rta_type = 0 + attr_data = None + try: + length = struct.unpack_from("H", data, offset=offset)[0] + rta_type = struct.unpack_from("H", data, offset=offset+2)[0] + except struct.error: + return None # Should mean our offset is >= remaining data + + # Unpack just the attribute's data. Offset by 4 to skip length/type header + attr_data = data[offset+RTA_DATA_START_OFFSET:offset+length] + return RTAAttr(length, rta_type, attr_data) + + +def read_rta_oper_state(data): + '''Reads Interface name and operational state from RTA Data. + + :param: data: string of data read from netlink socket + :returns: InterfaceOperstate object containing if_name and oper_state. + None if data does not contain valid IFLA_OPERSTATE and + IFLA_IFNAME messages. + :raises: AssertionError if data is None or length of data is + smaller than RTATTR_START_OFFSET. + ''' + assert (data is not None), ("data is none") + assert (len(data) > RTATTR_START_OFFSET), ( + "length of data is smaller than RTATTR_START_OFFSET") + ifname = operstate = None + offset = RTATTR_START_OFFSET + while offset <= len(data): + attr = unpack_rta_attr(data, offset) + if not attr or attr.length == 0: + break + # Each attribute is 4-byte aligned. Determine pad length. + padlen = (PAD_ALIGNMENT - + (attr.length % PAD_ALIGNMENT)) % PAD_ALIGNMENT + offset += attr.length + padlen + + if attr.rta_type == IFLA_OPERSTATE: + operstate = ord(attr.data) + elif attr.rta_type == IFLA_IFNAME: + interface_name = util.decode_binary(attr.data, 'utf-8') + ifname = interface_name.strip('\0') + if not ifname or operstate is None: + return None + LOG.debug("rta attrs: ifname %s operstate %d", ifname, operstate) + return InterfaceOperstate(ifname, operstate) + + +def wait_for_media_disconnect_connect(netlink_socket, ifname): + '''Block until media disconnect and connect has happened on an interface. + Listens on netlink socket to receive netlink events and when the carrier + changes from 0 to 1, it considers event has happened and + return from this function + + :param: netlink_socket: netlink_socket to receive events + :param: ifname: Interface name to lookout for netlink events + :raises: AssertionError if netlink_socket is None or ifname is None. + ''' + assert (netlink_socket is not None), ("netlink socket is none") + assert (ifname is not None), ("interface name is none") + assert (len(ifname) > 0), ("interface name cannot be empty") + carrier = OPER_UP + prevCarrier = OPER_UP + data = bytes() + LOG.debug("Wait for media disconnect and reconnect to happen") + while True: + recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT) + if recv_data is None: + continue + LOG.debug('read %d bytes from socket', len(recv_data)) + data += recv_data + LOG.debug('Length of data after concat %d', len(data)) + offset = 0 + datalen = len(data) + while offset < datalen: + nl_msg = data[offset:] + if len(nl_msg) < NLMSGHDR_SIZE: + LOG.debug("Data is smaller than netlink header") + break + nlheader = get_netlink_msg_header(nl_msg) + if len(nl_msg) < nlheader.length: + LOG.debug("Partial data. Smaller than netlink message") + break + padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1) + offset = offset + padlen + LOG.debug('offset to next netlink message: %d', offset) + # Ignore any messages not new link or del link + if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]: + continue + interface_state = read_rta_oper_state(nl_msg) + if interface_state is None: + LOG.debug('Failed to read rta attributes: %s', interface_state) + continue + if interface_state.ifname != ifname: + LOG.debug( + "Ignored netlink event on interface %s. Waiting for %s.", + interface_state.ifname, ifname) + continue + if interface_state.operstate not in [OPER_UP, OPER_DOWN]: + continue + prevCarrier = carrier + carrier = interface_state.operstate + # check for carrier down, up sequence + isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP) + if isVnetSwitch: + LOG.debug("Media switch happened on %s.", ifname) + return + data = data[offset:] + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py new file mode 100644 index 00000000..c2898a16 --- /dev/null +++ b/cloudinit/sources/helpers/tests/test_netlink.py @@ -0,0 +1,373 @@ +# Author: Tamilmani Manoharan +# +# This file is part of cloud-init. See LICENSE file for license information. + +from cloudinit.tests.helpers import CiTestCase, mock +import socket +import struct +import codecs +from cloudinit.sources.helpers.netlink import ( + NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket, + read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect, + OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT, + OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK, + RTM_GETLINK, MAX_SIZE) + + +def int_to_bytes(i): + '''convert integer to binary: eg: 1 to \x01''' + hex_value = '{0:x}'.format(i) + hex_value = '0' * (len(hex_value) % 2) + hex_value + return codecs.decode(hex_value, 'hex_codec') + + +class TestCreateBoundNetlinkSocket(CiTestCase): + + @mock.patch('cloudinit.sources.helpers.netlink.socket.socket') + def test_socket_error_on_create(self, m_socket): + '''create_bound_netlink_socket catches socket creation exception''' + + """NetlinkCreateSocketError is raised when socket creation errors.""" + m_socket.side_effect = socket.error("Fake socket failure") + with self.assertRaises(NetlinkCreateSocketError) as ctx_mgr: + create_bound_netlink_socket() + self.assertEqual( + 'Exception during netlink socket create: Fake socket failure', + str(ctx_mgr.exception)) + + +class TestReadNetlinkSocket(CiTestCase): + + @mock.patch('cloudinit.sources.helpers.netlink.socket.socket') + @mock.patch('cloudinit.sources.helpers.netlink.select.select') + def test_read_netlink_socket(self, m_select, m_socket): + '''read_netlink_socket able to receive data''' + data = 'netlinktest' + m_select.return_value = [m_socket], None, None + m_socket.recv.return_value = data + recv_data = read_netlink_socket(m_socket, 2) + m_select.assert_called_with([m_socket], [], [], 2) + m_socket.recv.assert_called_with(MAX_SIZE) + self.assertIsNotNone(recv_data) + self.assertEqual(recv_data, data) + + @mock.patch('cloudinit.sources.helpers.netlink.socket.socket') + @mock.patch('cloudinit.sources.helpers.netlink.select.select') + def test_netlink_read_timeout(self, m_select, m_socket): + '''read_netlink_socket should timeout if nothing to read''' + m_select.return_value = [], None, None + data = read_netlink_socket(m_socket, 1) + m_select.assert_called_with([m_socket], [], [], 1) + self.assertEqual(m_socket.recv.call_count, 0) + self.assertIsNone(data) + + def test_read_invalid_socket(self): + '''read_netlink_socket raises assert error if socket is invalid''' + socket = None + with self.assertRaises(AssertionError) as context: + read_netlink_socket(socket, 1) + self.assertTrue('netlink socket is none' in str(context.exception)) + + +class TestParseNetlinkMessage(CiTestCase): + + def test_read_rta_oper_state(self): + '''read_rta_oper_state could parse netlink message and extract data''' + ifname = "eth0" + bytes = ifname.encode("utf-8") + buf = bytearray(48) + struct.pack_into("HH4sHHc", buf, RTATTR_START_OFFSET, 8, 3, bytes, 5, + 16, int_to_bytes(OPER_DOWN)) + interface_state = read_rta_oper_state(buf) + self.assertEqual(interface_state.ifname, ifname) + self.assertEqual(interface_state.operstate, OPER_DOWN) + + def test_read_none_data(self): + '''read_rta_oper_state raises assert error if data is none''' + data = None + with self.assertRaises(AssertionError) as context: + read_rta_oper_state(data) + self.assertTrue('data is none', str(context.exception)) + + def test_read_invalid_rta_operstate_none(self): + '''read_rta_oper_state returns none if operstate is none''' + ifname = "eth0" + buf = bytearray(40) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4s", buf, RTATTR_START_OFFSET, 8, 3, bytes) + interface_state = read_rta_oper_state(buf) + self.assertIsNone(interface_state) + + def test_read_invalid_rta_ifname_none(self): + '''read_rta_oper_state returns none if ifname is none''' + buf = bytearray(40) + struct.pack_into("HHc", buf, RTATTR_START_OFFSET, 5, 16, + int_to_bytes(OPER_DOWN)) + interface_state = read_rta_oper_state(buf) + self.assertIsNone(interface_state) + + def test_read_invalid_data_len(self): + '''raise assert error if data size is smaller than required size''' + buf = bytearray(32) + with self.assertRaises(AssertionError) as context: + read_rta_oper_state(buf) + self.assertTrue('length of data is smaller than RTATTR_START_OFFSET' in + str(context.exception)) + + def test_unpack_rta_attr_none_data(self): + '''unpack_rta_attr raises assert error if data is none''' + data = None + with self.assertRaises(AssertionError) as context: + unpack_rta_attr(data, RTATTR_START_OFFSET) + self.assertTrue('data is none' in str(context.exception)) + + def test_unpack_rta_attr_invalid_offset(self): + '''unpack_rta_attr raises assert error if offset is invalid''' + data = bytearray(48) + with self.assertRaises(AssertionError) as context: + unpack_rta_attr(data, "offset") + self.assertTrue('offset is not integer' in str(context.exception)) + with self.assertRaises(AssertionError) as context: + unpack_rta_attr(data, 31) + self.assertTrue('rta offset is less than expected length' in + str(context.exception)) + + +@mock.patch('cloudinit.sources.helpers.netlink.socket.socket') +@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket') +class TestWaitForMediaDisconnectConnect(CiTestCase): + with_logs = True + + def _media_switch_data(self, ifname, msg_type, operstate): + '''construct netlink data with specified fields''' + if ifname and operstate is not None: + data = bytearray(48) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3, + bytes, 5, 16, int_to_bytes(operstate)) + elif ifname: + data = bytearray(40) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes) + elif operstate: + data = bytearray(40) + struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16, + int_to_bytes(operstate)) + struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0) + return data + + def test_media_down_up_scenario(self, m_read_netlink_socket, + m_socket): + '''Test for media down up sequence for required interface name''' + ifname = "eth0" + # construct data for Oper State down + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + # construct data for Oper State up + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_op_down, data_op_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 2) + + def test_wait_for_media_switch_diff_interface(self, m_read_netlink_socket, + m_socket): + '''wait_for_media_disconnect_connect ignores unexpected interfaces. + + The first two messages are for other interfaces and last two are for + expected interface. So the function exit only after receiving last + 2 messages and therefore the call count for m_read_netlink_socket + has to be 4 + ''' + other_ifname = "eth1" + expected_ifname = "eth0" + data_op_down_eth1 = self._media_switch_data( + other_ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up_eth1 = self._media_switch_data( + other_ifname, RTM_NEWLINK, OPER_UP) + data_op_down_eth0 = self._media_switch_data( + expected_ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up_eth0 = self._media_switch_data( + expected_ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_op_down_eth1, + data_op_up_eth1, + data_op_down_eth0, + data_op_up_eth0] + wait_for_media_disconnect_connect(m_socket, expected_ifname) + self.assertIn('Ignored netlink event on interface %s' % other_ifname, + self.logs.getvalue()) + self.assertEqual(m_read_netlink_socket.call_count, 4) + + def test_invalid_msgtype_getlink(self, m_read_netlink_socket, m_socket): + '''wait_for_media_disconnect_connect ignores GETLINK events. + + The first two messages are for oper down and up for RTM_GETLINK type + which netlink module will ignore. The last 2 messages are RTM_NEWLINK + with oper state down and up messages. Therefore the call count for + m_read_netlink_socket has to be 4 ignoring first 2 messages + of RTM_GETLINK + ''' + ifname = "eth0" + data_getlink_down = self._media_switch_data( + ifname, RTM_GETLINK, OPER_DOWN) + data_getlink_up = self._media_switch_data( + ifname, RTM_GETLINK, OPER_UP) + data_newlink_down = self._media_switch_data( + ifname, RTM_NEWLINK, OPER_DOWN) + data_newlink_up = self._media_switch_data( + ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_getlink_down, + data_getlink_up, + data_newlink_down, + data_newlink_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 4) + + def test_invalid_msgtype_setlink(self, m_read_netlink_socket, m_socket): + '''wait_for_media_disconnect_connect ignores SETLINK events. + + The first two messages are for oper down and up for RTM_GETLINK type + which it will ignore. 3rd and 4th messages are RTM_NEWLINK with down + and up messages. This function should exit after 4th messages since it + sees down->up scenario. So the call count for m_read_netlink_socket + has to be 4 ignoring first 2 messages of RTM_GETLINK and + last 2 messages of RTM_NEWLINK + ''' + ifname = "eth0" + data_setlink_down = self._media_switch_data( + ifname, RTM_SETLINK, OPER_DOWN) + data_setlink_up = self._media_switch_data( + ifname, RTM_SETLINK, OPER_UP) + data_newlink_down = self._media_switch_data( + ifname, RTM_NEWLINK, OPER_DOWN) + data_newlink_up = self._media_switch_data( + ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_setlink_down, + data_setlink_up, + data_newlink_down, + data_newlink_up, + data_newlink_down, + data_newlink_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 4) + + def test_netlink_invalid_switch_scenario(self, m_read_netlink_socket, + m_socket): + '''returns only if it receives UP event after a DOWN event''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + data_op_dormant = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_DORMANT) + data_op_notpresent = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_NOTPRESENT) + data_op_lowerdown = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_LOWERLAYERDOWN) + data_op_testing = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_TESTING) + data_op_unknown = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_UNKNOWN) + m_read_netlink_socket.side_effect = [data_op_up, data_op_up, + data_op_dormant, data_op_up, + data_op_notpresent, data_op_up, + data_op_lowerdown, data_op_up, + data_op_testing, data_op_up, + data_op_unknown, data_op_up, + data_op_down, data_op_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 14) + + def test_netlink_valid_inbetween_transitions(self, m_read_netlink_socket, + m_socket): + '''wait_for_media_disconnect_connect handles in between transitions''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + data_op_dormant = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_DORMANT) + data_op_unknown = self._media_switch_data(ifname, RTM_NEWLINK, + OPER_UNKNOWN) + m_read_netlink_socket.side_effect = [data_op_down, data_op_dormant, + data_op_unknown, data_op_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 4) + + def test_netlink_invalid_operstate(self, m_read_netlink_socket, m_socket): + '''wait_for_media_disconnect_connect should handle invalid operstates. + + The function should not fail and return even if it receives invalid + operstates. It always should wait for down up sequence. + ''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + data_op_invalid = self._media_switch_data(ifname, RTM_NEWLINK, 7) + m_read_netlink_socket.side_effect = [data_op_invalid, data_op_up, + data_op_down, data_op_invalid, + data_op_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 5) + + def test_wait_invalid_socket(self, m_read_netlink_socket, m_socket): + '''wait_for_media_disconnect_connect handle none netlink socket.''' + socket = None + ifname = "eth0" + with self.assertRaises(AssertionError) as context: + wait_for_media_disconnect_connect(socket, ifname) + self.assertTrue('netlink socket is none' in str(context.exception)) + + def test_wait_invalid_ifname(self, m_read_netlink_socket, m_socket): + '''wait_for_media_disconnect_connect handle none interface name''' + ifname = None + with self.assertRaises(AssertionError) as context: + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertTrue('interface name is none' in str(context.exception)) + ifname = "" + with self.assertRaises(AssertionError) as context: + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertTrue('interface name cannot be empty' in + str(context.exception)) + + def test_wait_invalid_rta_attr(self, m_read_netlink_socket, m_socket): + ''' wait_for_media_disconnect_connect handles invalid rta data''' + ifname = "eth0" + data_invalid1 = self._media_switch_data(None, RTM_NEWLINK, OPER_DOWN) + data_invalid2 = self._media_switch_data(ifname, RTM_NEWLINK, None) + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_invalid1, data_invalid2, + data_op_down, data_op_up] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 4) + + def test_read_multiple_netlink_msgs(self, m_read_netlink_socket, m_socket): + '''Read multiple messages in single receive call''' + ifname = "eth0" + bytes = ifname.encode("utf-8") + data = bytearray(96) + struct.pack_into("=LHHLL", data, 0, 48, RTM_NEWLINK, 0, 0, 0) + struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3, + bytes, 5, 16, int_to_bytes(OPER_DOWN)) + struct.pack_into("=LHHLL", data, 48, 48, RTM_NEWLINK, 0, 0, 0) + struct.pack_into("HH4sHHc", data, 48 + RTATTR_START_OFFSET, 8, + 3, bytes, 5, 16, int_to_bytes(OPER_UP)) + m_read_netlink_socket.return_value = data + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 1) + + def test_read_partial_netlink_msgs(self, m_read_netlink_socket, m_socket): + '''Read partial messages in receive call''' + ifname = "eth0" + bytes = ifname.encode("utf-8") + data1 = bytearray(112) + data2 = bytearray(32) + struct.pack_into("=LHHLL", data1, 0, 48, RTM_NEWLINK, 0, 0, 0) + struct.pack_into("HH4sHHc", data1, RTATTR_START_OFFSET, 8, 3, + bytes, 5, 16, int_to_bytes(OPER_DOWN)) + struct.pack_into("=LHHLL", data1, 48, 48, RTM_NEWLINK, 0, 0, 0) + struct.pack_into("HH4sHHc", data1, 80, 8, 3, bytes, 5, 16, + int_to_bytes(OPER_DOWN)) + struct.pack_into("=LHHLL", data1, 96, 48, RTM_NEWLINK, 0, 0, 0) + struct.pack_into("HH4sHHc", data2, 16, 8, 3, bytes, 5, 16, + int_to_bytes(OPER_UP)) + m_read_netlink_socket.side_effect = [data1, data2] + wait_for_media_disconnect_connect(m_socket, ifname) + self.assertEqual(m_read_netlink_socket.call_count, 2) diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 5ea7ae5e..417d86a9 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -564,6 +564,8 @@ fdescfs /dev/fd fdescfs rw 0 0 self.assertEqual(1, report_ready_func.call_count) @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') + @mock.patch('cloudinit.sources.helpers.netlink.' + 'wait_for_media_disconnect_connect') @mock.patch( 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') @@ -572,7 +574,7 @@ fdescfs /dev/fd fdescfs rw 0 0 def test_crawl_metadata_on_reprovision_reports_ready_using_lease( self, m_readurl, m_dhcp, m_net, report_ready_func, - m_write): + m_media_switch, m_write): """If reprovisioning, report ready using the obtained lease""" ovfenv = construct_valid_ovf_env( platform_settings={"PreprovisionedVm": "True"}) @@ -586,6 +588,7 @@ fdescfs /dev/fd fdescfs rw 0 0 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'} m_dhcp.return_value = [lease] + m_media_switch.return_value = None reprovision_ovfenv = construct_valid_ovf_env() m_readurl.return_value = url_helper.StringResponse( @@ -1676,6 +1679,8 @@ class TestPreprovisioningShouldReprovision(CiTestCase): @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') +@mock.patch('cloudinit.sources.helpers.netlink.' + 'wait_for_media_disconnect_connect') @mock.patch('requests.Session.request') @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready') class TestPreprovisioningPollIMDS(CiTestCase): @@ -1689,7 +1694,8 @@ class TestPreprovisioningPollIMDS(CiTestCase): @mock.patch(MOCKPATH + 'EphemeralDHCPv4') def test_poll_imds_re_dhcp_on_timeout(self, m_dhcpv4, report_ready_func, - fake_resp, m_dhcp, m_net): + fake_resp, m_media_switch, m_dhcp, + m_net): """The poll_imds will retry DHCP on IMDS timeout.""" report_file = self.tmp_path('report_marker', self.tmp) lease = { @@ -1697,7 +1703,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'} m_dhcp.return_value = [lease] - + m_media_switch.return_value = None dhcp_ctx = mock.MagicMock(lease=lease) dhcp_ctx.obtain_lease.return_value = lease m_dhcpv4.return_value = dhcp_ctx @@ -1723,11 +1729,12 @@ class TestPreprovisioningPollIMDS(CiTestCase): dsa._poll_imds() self.assertEqual(report_ready_func.call_count, 1) report_ready_func.assert_called_with(lease=lease) - self.assertEqual(2, m_dhcpv4.call_count, 'Expected 2 DHCP calls') + self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls') self.assertEqual(3, self.tries, 'Expected 3 total reads from IMDS') - def test_poll_imds_report_ready_false(self, report_ready_func, - fake_resp, m_dhcp, m_net): + def test_poll_imds_report_ready_false(self, + report_ready_func, fake_resp, + m_media_switch, m_dhcp, m_net): """The poll_imds should not call reporting ready when flag is false""" report_file = self.tmp_path('report_marker', self.tmp) @@ -1736,6 +1743,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', 'unknown-245': '624c3620'}] + m_media_switch.return_value = None dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file): dsa._poll_imds() @@ -1745,6 +1753,8 @@ class TestPreprovisioningPollIMDS(CiTestCase): @mock.patch(MOCKPATH + 'util.subp') @mock.patch(MOCKPATH + 'util.write_file') @mock.patch(MOCKPATH + 'util.is_FreeBSD') +@mock.patch('cloudinit.sources.helpers.netlink.' + 'wait_for_media_disconnect_connect') @mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') @mock.patch('requests.Session.request') @@ -1757,10 +1767,13 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): self.paths = helpers.Paths({'cloud_dir': tmp}) dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d - def test_poll_imds_returns_ovf_env(self, fake_resp, m_dhcp, m_net, + def test_poll_imds_returns_ovf_env(self, fake_resp, + m_dhcp, m_net, + m_media_switch, m_is_bsd, write_f, subp): """The _poll_imds method should return the ovf_env.xml.""" m_is_bsd.return_value = False + m_media_switch.return_value = None m_dhcp.return_value = [{ 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0'}] @@ -1778,16 +1791,19 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): 'Cloud-Init/%s' % vs() }, method='GET', timeout=1, url=full_url)]) - self.assertEqual(m_dhcp.call_count, 1) + self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9', prefix_or_mask='255.255.255.0', router='192.168.2.1') - self.assertEqual(m_net.call_count, 1) + self.assertEqual(m_net.call_count, 2) - def test__reprovision_calls__poll_imds(self, fake_resp, m_dhcp, m_net, + def test__reprovision_calls__poll_imds(self, fake_resp, + m_dhcp, m_net, + m_media_switch, m_is_bsd, write_f, subp): """The _reprovision method should call poll IMDS.""" m_is_bsd.return_value = False + m_media_switch.return_value = None m_dhcp.return_value = [{ 'interface': 'eth9', 'fixed-address': '192.168.2.9', 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', @@ -1811,11 +1827,11 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): 'User-Agent': 'Cloud-Init/%s' % vs()}, method='GET', timeout=1, url=full_url)]) - self.assertEqual(m_dhcp.call_count, 1) + self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9', prefix_or_mask='255.255.255.0', router='192.168.2.1') - self.assertEqual(m_net.call_count, 1) + self.assertEqual(m_net.call_count, 2) class TestRemoveUbuntuNetworkConfigScripts(CiTestCase): -- cgit v1.2.3