diff options
author | Chris Patterson <cpatterson@microsoft.com> | 2022-02-04 15:16:21 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-04 14:16:21 -0600 |
commit | 826783d83f55b78336cdb9e16ea39a1038618b03 (patch) | |
tree | 6186ddc388bdc367794dcc35ca23b7379ee1b6a2 | |
parent | 75a5c8f2dcb28167ef6ce6941a498ae4b8d2d0ac (diff) | |
download | vyos-cloud-init-826783d83f55b78336cdb9e16ea39a1038618b03.tar.gz vyos-cloud-init-826783d83f55b78336cdb9e16ea39a1038618b03.zip |
sources/azure: consolidate ephemeral DHCP configuration (#1229)
Introduce:
- _setup_ephemeral_networking() to bring up networking.
If no iface is specified, it will use net.find_fallback_nic()
which is consistent with the previous usage of fallback_interface.
This method now tracks the encoded address of the wireserver
with a new property `_wireserver_endpoint`. Introduce a
timeout parameter to allow for retrying for a specified amount
of time.
- _teardown_ephemeral_networking() to bring down networking.
- _is_ephemeral_networking_up() to check status.
Ephemeral networking is now:
- Brought up prior to checking IMDS.
- Torn down following metadata crawl.
- For Savable PPS, torn down prior to waiting for NIC detach.
The link must be torn down in advance or we will see errors
from cleaning up network after the interface is unplugged.
- For Running PPS, torn down after waiting for media switch.
The link must be up for media switch to be detected.
- For all PPS, after network switch is complete, networking is
brought back up to poll for reprovision data and report ready.
It will be torn down after metadata crawl is complete like
non-PPS paths.
Additionally:
- Remove EphemeralDHCPv4WithReporting variant in favor of directly
using EphemeralDHCPv4. The reporting was only for __enter__ usage
which is no longer a used path. Continue to use dhcp_log_cb
callback.
Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 313 | ||||
-rwxr-xr-x | cloudinit/sources/helpers/azure.py | 19 | ||||
-rw-r--r-- | tests/unittests/sources/test_azure.py | 234 |
3 files changed, 251 insertions, 315 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 222f99f2..44efd358 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -6,6 +6,7 @@ import base64 import crypt +import datetime import os import os.path import re @@ -24,15 +25,16 @@ from cloudinit import log as logging from cloudinit import net, sources, ssh_util, subp, util from cloudinit.event import EventScope, EventType from cloudinit.net import device_driver -from cloudinit.net.dhcp import NoDHCPLeaseError +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit.reporting import events from cloudinit.sources.helpers import netlink from cloudinit.sources.helpers.azure import ( DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE, - EphemeralDHCPv4WithReporting, + DEFAULT_WIRESERVER_ENDPOINT, azure_ds_reporter, azure_ds_telemetry_reporter, build_minimal_ovf, + dhcp_log_cb, get_boot_telemetry, get_metadata_from_fabric, get_system_info, @@ -303,6 +305,7 @@ class DataSourceAzure(sources.DataSource): self.dhclient_lease_file = self.ds_cfg.get("dhclient_lease_file") self._network_config = None self._ephemeral_dhcp_ctx = None + self._wireserver_endpoint = DEFAULT_WIRESERVER_ENDPOINT self.iso_dev = None def _unpickle(self, ci_pkl_version: int) -> None: @@ -311,6 +314,7 @@ class DataSourceAzure(sources.DataSource): self._ephemeral_dhcp_ctx = None if not hasattr(self, "iso_dev"): self.iso_dev = None + self._wireserver_endpoint = DEFAULT_WIRESERVER_ENDPOINT def __str__(self): root = sources.DataSource.__str__(self) @@ -327,6 +331,76 @@ class DataSourceAzure(sources.DataSource): return "%s (%s)" % (subplatform_type, self.seed) @azure_ds_telemetry_reporter + def _setup_ephemeral_networking( + self, *, iface: Optional[str] = None, timeout_minutes: int = 5 + ) -> None: + """Setup ephemeral networking. + + Keep retrying DHCP up to specified number of minutes. This does + not kill dhclient, so the timeout in practice may be up to + timeout_minutes + the system-configured timeout for dhclient. + + :param timeout_minutes: Number of minutes to keep retrying for. + + :raises NoDHCPLeaseError: If unable to obtain DHCP lease. + """ + if self._ephemeral_dhcp_ctx is not None: + raise RuntimeError( + "Bringing up networking when already configured." + ) + + LOG.debug("Requested ephemeral networking (iface=%s)", iface) + + start = datetime.datetime.utcnow() + timeout = start + datetime.timedelta(minutes=timeout_minutes) + + self._ephemeral_dhcp_ctx = EphemeralDHCPv4( + iface=iface, dhcp_log_func=dhcp_log_cb + ) + + lease = None + with events.ReportEventStack( + name="obtain-dhcp-lease", + description="obtain dhcp lease", + parent=azure_ds_reporter, + ): + while datetime.datetime.utcnow() < timeout: + try: + lease = self._ephemeral_dhcp_ctx.obtain_lease() + break + except NoDHCPLeaseError: + continue + + if lease is None: + msg = "Failed to obtain DHCP lease (iface=%s)" % iface + report_diagnostic_event(msg, logger_func=LOG.error) + self._ephemeral_dhcp_ctx = None + raise NoDHCPLeaseError() + else: + # Ensure iface is set. + self._ephemeral_dhcp_ctx.iface = lease["interface"] + + # Update wireserver IP from DHCP options. + if "unknown-245" in lease: + self._wireserver_endpoint = lease["unknown-245"] + + @azure_ds_telemetry_reporter + def _teardown_ephemeral_networking(self) -> None: + """Teardown ephemeral networking.""" + if self._ephemeral_dhcp_ctx is None: + return + + self._ephemeral_dhcp_ctx.clean_network() + self._ephemeral_dhcp_ctx = None + + def _is_ephemeral_networking_up(self) -> bool: + """Check if networking is configured.""" + return not ( + self._ephemeral_dhcp_ctx is None + or self._ephemeral_dhcp_ctx.lease is None + ) + + @azure_ds_telemetry_reporter def crawl_metadata(self): """Walk all instance metadata sources returning a dict on success. @@ -350,6 +424,8 @@ class DataSourceAzure(sources.DataSource): userdata_raw = "" cfg = {} files = {} + + iso_dev = None if os.path.isfile(REPROVISION_MARKER_FILE): metadata_source = "IMDS" report_diagnostic_event( @@ -370,7 +446,7 @@ class DataSourceAzure(sources.DataSource): src, load_azure_ds_dir ) # save the device for ejection later - self.iso_dev = src + iso_dev = src else: md, userdata_raw, cfg, files = load_azure_ds_dir(src) ovf_is_accessible = True @@ -400,19 +476,31 @@ class DataSourceAzure(sources.DataSource): logger_func=LOG.debug, ) - imds_md = self.get_imds_data_with_api_fallback( - self.fallback_interface, retries=10 - ) + # If we read OVF from attached media, we are provisioning. If OVF + # is not found, we are probably provisioning on a system which does + # not have UDF support. In either case, require IMDS metadata. + # If we require IMDS metadata, try harder to obtain networking, waiting + # for at least 20 minutes. Otherwise only wait 5 minutes. + requires_imds_metadata = bool(iso_dev) or not ovf_is_accessible + timeout_minutes = 5 if requires_imds_metadata else 20 + try: + self._setup_ephemeral_networking(timeout_minutes=timeout_minutes) + except NoDHCPLeaseError: + pass - # reset _fallback_interface so that if the code enters reprovisioning - # flow, it will force re-evaluation of new fallback nic. - self._fallback_interface = None + if self._is_ephemeral_networking_up(): + imds_md = self.get_imds_data_with_api_fallback(retries=10) + else: + imds_md = {} if not imds_md and not ovf_is_accessible: msg = "No OVF or IMDS available" report_diagnostic_event(msg) raise sources.InvalidMetaDataException(msg) + self.iso_dev = iso_dev + + # Refresh PPS type using metadata. pps_type = self._determine_pps_type(cfg, imds_md) if pps_type != PPSType.NONE: if util.is_FreeBSD(): @@ -427,9 +515,7 @@ class DataSourceAzure(sources.DataSource): md, userdata_raw, cfg, files = self._reprovision() # fetch metadata again as it has changed after reprovisioning - imds_md = self.get_imds_data_with_api_fallback( - self.fallback_interface, retries=10 - ) + imds_md = self.get_imds_data_with_api_fallback(retries=10) self.seed = metadata_source crawled_data.update( @@ -509,26 +595,8 @@ class DataSourceAzure(sources.DataSource): if pps_type != PPSType.NONE: LOG.info("Reporting ready to Azure after getting ReprovisionData") - use_cached_ephemeral = ( - self.distro.networking.is_up(self.fallback_interface) - and self._ephemeral_dhcp_ctx - and self._ephemeral_dhcp_ctx.lease - ) - if use_cached_ephemeral: - self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) - self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral - else: - try: - with EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) as lease: - self._report_ready(lease=lease) - except Exception as e: - report_diagnostic_event( - "exception while reporting ready: %s" % e, - logger_func=LOG.error, - ) - raise + self._report_ready() + return crawled_data def _is_platform_viable(self): @@ -575,6 +643,8 @@ class DataSourceAzure(sources.DataSource): description=DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE ) return False + finally: + self._teardown_ephemeral_networking() if ( self.distro @@ -626,7 +696,7 @@ class DataSourceAzure(sources.DataSource): @azure_ds_telemetry_reporter def get_imds_data_with_api_fallback( self, - fallback_nic, + *, retries, md_type=MetadataType.ALL, exc_cb=retry_on_url_exc, @@ -643,7 +713,6 @@ class DataSourceAzure(sources.DataSource): try: LOG.info("Attempting IMDS api-version: %s", IMDS_VER_WANT) return get_metadata_from_imds( - fallback_nic=fallback_nic, retries=0, md_type=md_type, api_version=IMDS_VER_WANT, @@ -660,7 +729,6 @@ class DataSourceAzure(sources.DataSource): LOG.info("Using IMDS api-version: %s", IMDS_VER_MIN) return get_metadata_from_imds( - fallback_nic=fallback_nic, retries=retries, md_type=md_type, api_version=IMDS_VER_MIN, @@ -891,12 +959,12 @@ class DataSourceAzure(sources.DataSource): ) @azure_ds_telemetry_reporter - def _report_ready_for_pps(self, lease: dict) -> None: + def _report_ready_for_pps(self) -> None: """Report ready for PPS, creating the marker file upon completion. :raises sources.InvalidMetaDataException: On error reporting ready. """ - report_ready_succeeded = self._report_ready(lease=lease) + report_ready_succeeded = self._report_ready() if not report_ready_succeeded: msg = "Failed reporting ready while in the preprovisioning pool." report_diagnostic_event(msg, logger_func=LOG.error) @@ -919,22 +987,8 @@ class DataSourceAzure(sources.DataSource): # For now, only a VM's primary NIC can contact IMDS and WireServer. If # DHCP fails for a NIC, we have no mechanism to determine if the NIC is - # primary or secondary. In this case, the desired behavior is to fail - # VM provisioning if there is any DHCP failure when trying to determine - # the primary NIC. - try: - dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter, iface=ifname - ) - dhcp_ctx.obtain_lease() - except Exception as e: - report_diagnostic_event( - "Giving up. Failed to obtain dhcp lease " - "for %s when attempting to determine " - "primary NIC during reprovision due to %s" % (ifname, e), - logger_func=LOG.error, - ) - raise + # primary or secondary. In this case, retry DHCP until successful. + self._setup_ephemeral_networking(iface=ifname, timeout_minutes=20) # Retry polling network metadata for a limited duration only when the # calls fail due to network unreachable error or timeout. @@ -982,7 +1036,10 @@ class DataSourceAzure(sources.DataSource): # could add several seconds of delay. try: imds_md = self.get_imds_data_with_api_fallback( - ifname, 0, MetadataType.NETWORK, network_metadata_exc_cb, True + retries=0, + md_type=MetadataType.NETWORK, + exc_cb=network_metadata_exc_cb, + infinite=True, ) except Exception as e: LOG.warning( @@ -992,25 +1049,21 @@ class DataSourceAzure(sources.DataSource): ifname, e, ) - finally: - # If we are not the primary nic, then clean the dhcp context. - if not imds_md: - dhcp_ctx.clean_network() if imds_md: # Only primary NIC will get a response from IMDS. LOG.info("%s is the primary nic", ifname) is_primary = True - # If primary, set ephemeral dhcp ctx so we can report ready - self._ephemeral_dhcp_ctx = dhcp_ctx - # Set the expected nic count based on the response received. expected_nic_count = len(imds_md["interface"]) report_diagnostic_event( "Expected nic count: %d" % expected_nic_count, logger_func=LOG.info, ) + else: + # If we are not the primary nic, then clean the dhcp context. + self._teardown_ephemeral_networking() return is_primary, expected_nic_count @@ -1097,19 +1150,7 @@ class DataSourceAzure(sources.DataSource): # The nic of the preprovisioned vm gets hot-detached as soon as # we report ready. So no need to save the dhcp context. if not os.path.isfile(REPORTED_READY_MARKER_FILE): - try: - with EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) as lease: - self._report_ready_for_pps(lease) - except NoDHCPLeaseError as error: - report_diagnostic_event( - "DHCP failed while in provisioning pool", - logger_func=LOG.warning, - ) - raise sources.InvalidMetaDataException( - "Failed to report ready while in provisioning pool." - ) from error + self._report_ready_for_pps() has_nic_been_detached = bool( os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE) @@ -1117,6 +1158,7 @@ class DataSourceAzure(sources.DataSource): if not has_nic_been_detached: LOG.info("NIC has not been detached yet.") + self._teardown_ephemeral_networking() self._wait_for_nic_detach(nl_sock) # If we know that the preprovisioned nic has been detached, and we @@ -1193,31 +1235,23 @@ class DataSourceAzure(sources.DataSource): ) return False - # When the interface is hot-attached, we would have already - # done dhcp and set the dhcp context. In that case, skip - # the attempt to do dhcp. - msg = ( - "Unexpected error. Dhcp context is not expected to be already " - "set when we need to wait for vnet switch" - ) - if self._ephemeral_dhcp_ctx is not None and report_ready: - report_diagnostic_event(msg, logger_func=LOG.error) - raise RuntimeError(msg) - if report_ready: + # Networking must be up for netlink to detect + # media disconnect/connect. It may be down to due + # initial DHCP failure, if so check for it and retry, + # ensuring we flag it as required. + if not self._is_ephemeral_networking_up(): + self._setup_ephemeral_networking(timeout_minutes=20) + try: - self._ephemeral_dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) - lease = self._ephemeral_dhcp_ctx.obtain_lease() + iface = self._ephemeral_dhcp_ctx.iface + nl_sock = netlink.create_bound_netlink_socket() - self._report_ready_for_pps(lease) + self._report_ready_for_pps() - # Networking must remain up for netlink to detect - # media disconnect/connect. LOG.debug( "Wait for vnetswitch to happen on %s", - lease["interface"], + iface, ) with events.ReportEventStack( name="wait-for-media-disconnect-connect", @@ -1226,7 +1260,7 @@ class DataSourceAzure(sources.DataSource): ): try: netlink.wait_for_media_disconnect_connect( - nl_sock, lease["interface"] + nl_sock, iface ) except AssertionError as e: report_diagnostic_event( @@ -1254,17 +1288,13 @@ class DataSourceAzure(sources.DataSource): nl_sock.close() # Teardown old network configuration. - self._ephemeral_dhcp_ctx.clean_network() - self._ephemeral_dhcp_ctx = None + self._teardown_ephemeral_networking() while not reprovision_data: - if self._ephemeral_dhcp_ctx is None: - self._ephemeral_dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) + if not self._is_ephemeral_networking_up(): dhcp_attempts += 1 try: - self._ephemeral_dhcp_ctx.obtain_lease() + self._setup_ephemeral_networking(timeout_minutes=5) except NoDHCPLeaseError: continue @@ -1283,8 +1313,7 @@ class DataSourceAzure(sources.DataSource): log_req_resp=False, ).contents except UrlError: - self._ephemeral_dhcp_ctx.clean_network() - self._ephemeral_dhcp_ctx = None + self._teardown_ephemeral_networking() continue report_diagnostic_event( @@ -1305,42 +1334,39 @@ class DataSourceAzure(sources.DataSource): @param description: A description of the error encountered. @return: The success status of sending the failure signal. """ - unknown_245_key = "unknown-245" - - try: - if ( - self.distro.networking.is_up(self.fallback_interface) - and self._ephemeral_dhcp_ctx - and self._ephemeral_dhcp_ctx.lease - and unknown_245_key in self._ephemeral_dhcp_ctx.lease - ): + if self._is_ephemeral_networking_up(): + try: report_diagnostic_event( "Using cached ephemeral dhcp context " "to report failure to Azure", logger_func=LOG.debug, ) report_failure_to_fabric( - dhcp_opts=self._ephemeral_dhcp_ctx.lease[unknown_245_key], + dhcp_opts=self._wireserver_endpoint, description=description, ) - self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral return True - except Exception as e: - report_diagnostic_event( - "Failed to report failure using " - "cached ephemeral dhcp context: %s" % e, - logger_func=LOG.error, - ) + except Exception as e: + report_diagnostic_event( + "Failed to report failure using " + "cached ephemeral dhcp context: %s" % e, + logger_func=LOG.error, + ) try: report_diagnostic_event( "Using new ephemeral dhcp to report failure to Azure", logger_func=LOG.debug, ) - with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: - report_failure_to_fabric( - dhcp_opts=lease[unknown_245_key], description=description - ) + self._teardown_ephemeral_networking() + try: + self._setup_ephemeral_networking(timeout_minutes=20) + except NoDHCPLeaseError: + # Reporting failure will fail, but it will emit telemetry. + pass + report_failure_to_fabric( + dhcp_opts=self._wireserver_endpoint, description=description + ) return True except Exception as e: report_diagnostic_event( @@ -1350,16 +1376,15 @@ class DataSourceAzure(sources.DataSource): return False - def _report_ready(self, lease: dict) -> bool: + def _report_ready(self) -> bool: """Tells the fabric provisioning has completed. - @param lease: dhcp lease to use for sending the ready signal. @return: The success status of sending the ready signal. """ try: get_metadata_from_fabric( fallback_lease_file=None, - dhcp_opts=lease["unknown-245"], + dhcp_opts=self._wireserver_endpoint, iso_dev=self.iso_dev, ) return True @@ -1371,7 +1396,7 @@ class DataSourceAzure(sources.DataSource): ) return False - def _ppstype_from_imds(self, imds_md: dict = None) -> str: + def _ppstype_from_imds(self, imds_md: dict) -> Optional[str]: try: return imds_md["extended"]["compute"]["ppsType"] except Exception as e: @@ -1416,7 +1441,10 @@ class DataSourceAzure(sources.DataSource): ) def _reprovision(self): - """Initiate the reprovisioning workflow.""" + """Initiate the reprovisioning workflow. + + Ephemeral networking is up upon successful reprovisioning. + """ contents = self._poll_imds() with events.ReportEventStack( name="reprovisioning-read-azure-ovf", @@ -2206,7 +2234,6 @@ def _generate_network_config_from_fallback_config() -> dict: @azure_ds_telemetry_reporter def get_metadata_from_imds( - fallback_nic, retries, md_type=MetadataType.ALL, api_version=IMDS_VER_MIN, @@ -2215,12 +2242,9 @@ def get_metadata_from_imds( ): """Query Azure's instance metadata service, returning a dictionary. - If network is not up, setup ephemeral dhcp on fallback_nic to talk to the - IMDS. For more info on IMDS: + For more info on IMDS: https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service - @param fallback_nic: String. The name of the nic which requires active - network in order to query IMDS. @param retries: The number of retries of the IMDS_URL. @param md_type: Metadata type for IMDS request. @param api_version: IMDS api-version to use in the request. @@ -2234,18 +2258,14 @@ def get_metadata_from_imds( "func": _get_metadata_from_imds, "args": (retries, exc_cb, md_type, api_version, infinite), } - if net.is_up(fallback_nic): + try: return util.log_time(**kwargs) - else: - try: - with EphemeralDHCPv4WithReporting(azure_ds_reporter, fallback_nic): - return util.log_time(**kwargs) - except Exception as e: - report_diagnostic_event( - "exception while getting metadata: %s" % e, - logger_func=LOG.warning, - ) - raise + except Exception as e: + report_diagnostic_event( + "exception while getting metadata: %s" % e, + logger_func=LOG.warning, + ) + raise @azure_ds_telemetry_reporter @@ -2295,7 +2315,8 @@ def _get_metadata_from_imds( except json_decode_error as e: report_diagnostic_event( "Ignoring non-json IMDS instance metadata response: %s. " - "Loading non-json IMDS response failed: %s" % (str(response), e), + "Loading non-json IMDS response failed: %s" + % (response.contents, e), logger_func=LOG.warning, ) return {} diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index e0a1abb6..1a8cd34f 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -25,7 +25,6 @@ from cloudinit import ( version, ) from cloudinit.net import dhcp -from cloudinit.net.dhcp import EphemeralDHCPv4 from cloudinit.reporting import events from cloudinit.settings import CFG_BUILTIN @@ -215,6 +214,7 @@ def report_diagnostic_event( msg: str, *, logger_func=None ) -> events.ReportingEvent: """Report a diagnostic event""" + print(msg) if callable(logger_func): logger_func(msg) evt = events.ReportingEvent( @@ -1243,21 +1243,4 @@ def dhcp_log_cb(out, err): ) -class EphemeralDHCPv4WithReporting(EphemeralDHCPv4): - def __init__(self, reporter, iface=None): - self.reporter = reporter - - super(EphemeralDHCPv4WithReporting, self).__init__( - iface=iface, dhcp_log_func=dhcp_log_cb - ) - - def __enter__(self): - with events.ReportEventStack( - name="obtain-dhcp-lease", - description="obtain dhcp lease", - parent=self.reporter, - ): - return super(EphemeralDHCPv4WithReporting, self).__enter__() - - # vi: ts=4 expandtab diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 26fed1c4..a6c43ea7 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -472,44 +472,15 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): dsaz.IMDS_URL ) - @mock.patch(MOCKPATH + "readurl") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_does_not_dhcp_if_network_is_up( - self, m_net_is_up, m_dhcp, m_readurl - ): - """Do not perform DHCP setup when nic is already up.""" - m_net_is_up.return_value = True - m_readurl.return_value = url_helper.StringResponse( - json.dumps(NETWORK_METADATA).encode("utf-8") - ) - self.assertEqual( - NETWORK_METADATA, dsaz.get_metadata_from_imds("eth9", retries=3) - ) - - m_net_is_up.assert_called_with("eth9") - m_dhcp.assert_not_called() - self.assertIn( - "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time - self.logs.getvalue(), - ) - @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_metadata_uses_instance_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_uses_instance_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) - dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.ALL - ) + dsaz.get_metadata_from_imds(retries=3, md_type=dsaz.MetadataType.ALL) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance?api-version=2019-06-01", exception_cb=mock.ANY, @@ -520,20 +491,15 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_network_metadata_uses_network_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_network_metadata_uses_network_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing network metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.NETWORK + retries=3, md_type=dsaz.MetadataType.NETWORK ) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance/network?api-version=" @@ -546,19 +512,15 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_default_metadata_uses_instance_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) + def test_get_default_metadata_uses_instance_url(self, m_dhcp, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) - dsaz.get_metadata_from_imds("eth0", retries=3) + dsaz.get_metadata_from_imds(retries=3) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance?api-version=2019-06-01", exception_cb=mock.ANY, @@ -569,20 +531,14 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_metadata_uses_extended_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_uses_extended_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", @@ -598,23 +554,16 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_performs_dhcp_when_network_is_down( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_performs_dhcp_when_network_is_down(self, m_readurl): """Perform DHCP setup when nic is not up.""" - m_net_is_up.return_value = False m_readurl.return_value = url_helper.StringResponse( json.dumps(NETWORK_METADATA).encode("utf-8") ) self.assertEqual( - NETWORK_METADATA, dsaz.get_metadata_from_imds("eth9", retries=2) + NETWORK_METADATA, dsaz.get_metadata_from_imds(retries=2) ) - m_net_is_up.assert_called_with("eth9") - m_dhcp.assert_called_with(mock.ANY, "eth9") self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time self.logs.getvalue(), @@ -630,10 +579,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): ) @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_from_imds_empty_when_no_imds_present( - self, m_net_is_up, m_sleep - ): + def test_get_metadata_from_imds_empty_when_no_imds_present(self, m_sleep): """Return empty dict when IMDS network metadata is absent.""" httpretty.register_uri( httpretty.GET, @@ -642,11 +588,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): status=404, ) - m_net_is_up.return_value = True # skips dhcp + self.assertEqual({}, dsaz.get_metadata_from_imds(retries=2)) - self.assertEqual({}, dsaz.get_metadata_from_imds("eth9", retries=2)) - - m_net_is_up.assert_called_with("eth9") self.assertEqual([mock.call(1), mock.call(1)], m_sleep.call_args_list) self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time @@ -655,9 +598,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): @mock.patch("requests.Session.request") @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(MOCKPATH + "net.is_up", autospec=True) def test_get_metadata_from_imds_retries_on_timeout( - self, m_net_is_up, m_sleep, m_request + self, m_sleep, m_request ): """Retry IMDS network metadata on timeout errors.""" @@ -674,11 +616,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): body=retry_callback, ) - m_net_is_up.return_value = True # skips dhcp - - self.assertEqual({}, dsaz.get_metadata_from_imds("eth9", retries=3)) + self.assertEqual({}, dsaz.get_metadata_from_imds(retries=3)) - m_net_is_up.assert_called_with("eth9") self.assertEqual([mock.call(1)] * 3, m_sleep.call_args_list) self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time @@ -710,10 +649,12 @@ class TestAzureDataSource(CiTestCase): self.m_dhcp = self.patches.enter_context( mock.patch.object( dsaz, - "EphemeralDHCPv4WithReporting", + "EphemeralDHCPv4", autospec=True, ) ) + self.m_dhcp.return_value.lease = {} + self.m_dhcp.return_value.iface = "eth4" self.m_get_metadata_from_imds = self.patches.enter_context( mock.patch.object( @@ -1254,7 +1195,8 @@ scbus-1 on xpt0 bus 0 "cloudinit.sources.helpers.netlink.wait_for_media_disconnect_connect" ) @mock.patch( - "cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready" + "cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready", + return_value=True, ) @mock.patch("cloudinit.sources.DataSourceAzure.readurl") def test_crawl_metadata_on_reprovision_reports_ready_using_lease( @@ -1268,34 +1210,24 @@ scbus-1 on xpt0 bus 0 data = {"ovfcontent": ovfenv, "sys_cfg": {}} dsrc = self._get_ds(data) - with mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: - - # For this mock, net should not be up, - # so that cached ephemeral won't be used. - # This is so that a NEW ephemeral dhcp lease will be discovered - # and used instead. - m_dsrc_distro_networking_is_up.return_value = False + lease = { + "interface": "eth9", + "fixed-address": "192.168.2.9", + "routers": "192.168.2.1", + "subnet-mask": "255.255.255.0", + "unknown-245": "624c3620", + } + self.m_dhcp.return_value.obtain_lease.return_value = lease + m_media_switch.return_value = None - lease = { - "interface": "eth9", - "fixed-address": "192.168.2.9", - "routers": "192.168.2.1", - "subnet-mask": "255.255.255.0", - "unknown-245": "624c3620", - } - self.m_dhcp.return_value.__enter__.return_value = lease - m_media_switch.return_value = None + reprovision_ovfenv = construct_valid_ovf_env() + m_readurl.return_value = url_helper.StringResponse( + reprovision_ovfenv.encode("utf-8") + ) - reprovision_ovfenv = construct_valid_ovf_env() - m_readurl.return_value = url_helper.StringResponse( - reprovision_ovfenv.encode("utf-8") - ) + dsrc.crawl_metadata() - dsrc.crawl_metadata() - self.assertEqual(2, m_report_ready.call_count) - m_report_ready.assert_called_with(lease=lease) + assert m_report_ready.mock_calls == [mock.call(), mock.call()] def test_waagent_d_has_0700_perms(self): # we expect /var/lib/waagent to be created 0700 @@ -1673,12 +1605,12 @@ scbus-1 on xpt0 bus 0 def test_dsaz_report_ready_returns_true_when_report_succeeds(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) - self.assertTrue(dsrc._report_ready(lease=mock.MagicMock())) + self.assertTrue(dsrc._report_ready()) def test_dsaz_report_ready_returns_false_and_does_not_propagate_exc(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) self.m_get_metadata_from_fabric.side_effect = Exception - self.assertFalse(dsrc._report_ready(lease=mock.MagicMock())) + self.assertFalse(dsrc._report_ready()) def test_dsaz_report_failure_returns_true_when_report_succeeds(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) @@ -1750,46 +1682,31 @@ scbus-1 on xpt0 bus 0 with mock.patch.object( dsrc, "crawl_metadata" ) as m_crawl_metadata, mock.patch.object( - dsrc, "_ephemeral_dhcp_ctx" - ) as m_ephemeral_dhcp_ctx, mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: + dsrc, "_wireserver_endpoint", return_value="test-ep" + ) as m_wireserver_endpoint: # mock crawl metadata failure to cause report failure m_crawl_metadata.side_effect = Exception - # setup mocks to allow using cached ephemeral dhcp lease - m_dsrc_distro_networking_is_up.return_value = True - test_lease_dhcp_option_245 = "test_lease_dhcp_option_245" - test_lease = {"unknown-245": test_lease_dhcp_option_245} - m_ephemeral_dhcp_ctx.lease = test_lease - self.assertTrue(dsrc._report_failure()) # ensure called with cached ephemeral dhcp lease option 245 self.m_report_failure_to_fabric.assert_called_once_with( - description=mock.ANY, dhcp_opts=test_lease_dhcp_option_245 + description=mock.ANY, dhcp_opts=m_wireserver_endpoint ) - # ensure cached ephemeral is cleaned - self.assertEqual(1, m_ephemeral_dhcp_ctx.clean_network.call_count) - def test_dsaz_report_failure_no_net_uses_new_ephemeral_dhcp_lease(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) - with mock.patch.object( - dsrc, "crawl_metadata" - ) as m_crawl_metadata, mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: + with mock.patch.object(dsrc, "crawl_metadata") as m_crawl_metadata: # mock crawl metadata failure to cause report failure m_crawl_metadata.side_effect = Exception - # net is not up and cannot use cached ephemeral dhcp - m_dsrc_distro_networking_is_up.return_value = False - # setup ephemeral dhcp lease discovery mock test_lease_dhcp_option_245 = "test_lease_dhcp_option_245" - test_lease = {"unknown-245": test_lease_dhcp_option_245} - self.m_dhcp.return_value.__enter__.return_value = test_lease + test_lease = { + "unknown-245": test_lease_dhcp_option_245, + "interface": "eth0", + } + self.m_dhcp.return_value.obtain_lease.return_value = test_lease self.assertTrue(dsrc._report_failure()) @@ -2119,14 +2036,12 @@ scbus-1 on xpt0 bus 0 assert m_get_metadata_from_imds.mock_calls == [ mock.call( - fallback_nic="eth9", retries=0, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", exc_cb=mock.ANY, ), mock.call( - fallback_nic="eth9", retries=10, md_type=dsaz.MetadataType.ALL, api_version="2019-06-01", @@ -2151,7 +2066,6 @@ scbus-1 on xpt0 bus 0 assert m_get_metadata_from_imds.mock_calls == [ mock.call( - fallback_nic="eth9", retries=0, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", @@ -2862,11 +2776,10 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch(MOCKPATH + "util.write_file", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_reports_ready_and_waits_for_detach( - self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_writefile + self, m_detach, m_report_ready, m_fallback_if, m_writefile ): """Report ready first and then wait for nic detach""" dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) @@ -2875,14 +2788,13 @@ class TestPreprovisioningHotAttachNics(CiTestCase): self.assertEqual(1, m_report_ready.call_count) self.assertEqual(1, m_detach.call_count) self.assertEqual(1, m_writefile.call_count) - self.assertEqual(1, m_dhcp.call_count) m_writefile.assert_called_with( dsaz.REPORTED_READY_MARKER_FILE, mock.ANY ) @mock.patch("os.path.isfile") @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_skips_report_ready_when_marker_present( @@ -2903,7 +2815,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("os.path.isfile") @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting") + @mock.patch(MOCKPATH + "EphemeralDHCPv4") @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_skips_nic_detach_when_marker_present( @@ -2923,7 +2835,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("cloudinit.sources.helpers.netlink.wait_for_nic_attach_event") @mock.patch("cloudinit.sources.net.find_fallback_nic") @mock.patch(MOCKPATH + "get_metadata_from_imds") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") @mock.patch("os.path.isfile") def test_wait_for_nic_attach_if_no_fallback_interface( @@ -2967,7 +2879,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("cloudinit.sources.helpers.netlink.wait_for_nic_attach_event") @mock.patch("cloudinit.sources.net.find_fallback_nic") @mock.patch(MOCKPATH + "DataSourceAzure.get_imds_data_with_api_fallback") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") @mock.patch("os.path.isfile") def test_wait_for_nic_attach_multinic_attach( @@ -3020,7 +2932,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("cloudinit.url_helper.time.sleep", autospec=True) @mock.patch("requests.Session.request", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) def test_check_if_nic_is_primary_retries_on_failures( self, m_dhcpv4, m_request, m_sleep ): @@ -3043,16 +2955,13 @@ class TestPreprovisioningHotAttachNics(CiTestCase): ] } - dhcp_ctx = mock.MagicMock(lease=lease) - dhcp_ctx.obtain_lease.return_value = lease - m_dhcpv4.return_value = dhcp_ctx - m_req = mock.Mock(content=json.dumps(md)) m_request.side_effect = [ requests.Timeout("Fake connection timeout"), requests.ConnectionError("Fake Network Unreachable"), m_req, ] + m_dhcpv4.return_value.lease = lease is_primary, expected_nic_count = dsa._check_if_nic_is_primary("eth0") self.assertEqual(True, is_primary) @@ -3162,13 +3071,14 @@ class TestPreprovisioningHotAttachNics(CiTestCase): # dsa._wait_for_all_nics_ready() +@mock.patch("cloudinit.net.find_fallback_nic", return_value="eth9") @mock.patch("cloudinit.net.dhcp.EphemeralIPv4Network") @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") @mock.patch( "cloudinit.sources.helpers.netlink.wait_for_media_disconnect_connect" ) @mock.patch("requests.Session.request") -@mock.patch(MOCKPATH + "DataSourceAzure._report_ready") +@mock.patch(MOCKPATH + "DataSourceAzure._report_ready", return_value=True) class TestPreprovisioningPollIMDS(CiTestCase): def setUp(self): super(TestPreprovisioningPollIMDS, self).setUp() @@ -3185,6 +3095,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds will retry DHCP on IMDS timeout.""" report_file = self.tmp_path("report_marker", self.tmp) @@ -3220,8 +3131,9 @@ class TestPreprovisioningPollIMDS(CiTestCase): dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + "REPORTED_READY_MARKER_FILE", report_file): dsa._poll_imds() - self.assertEqual(m_report_ready.call_count, 1) - m_report_ready.assert_called_with(lease=lease) + + assert m_report_ready.mock_calls == [mock.call()] + self.assertEqual(3, m_dhcp.call_count, "Expected 3 DHCP calls") self.assertEqual(4, self.tries, "Expected 4 total reads from IMDS") @@ -3234,6 +3146,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds function should reuse the dhcp ctx if it is already present. This happens when we wait for nic to be hot-attached before @@ -3243,14 +3156,14 @@ class TestPreprovisioningPollIMDS(CiTestCase): report_file = self.tmp_path("report_marker", self.tmp) m_isfile.return_value = True dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) - dsa._ephemeral_dhcp_ctx = "Dummy dhcp ctx" + dsa._ephemeral_dhcp_ctx = mock.Mock(lease={}) with mock.patch(MOCKPATH + "REPORTED_READY_MARKER_FILE", report_file): dsa._poll_imds() self.assertEqual(0, m_dhcp.call_count) self.assertEqual(0, m_media_switch.call_count) @mock.patch("os.path.isfile") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) def test_poll_imds_does_dhcp_on_retries_if_ctx_present( self, m_ephemeral_dhcpv4, @@ -3260,6 +3173,7 @@ class TestPreprovisioningPollIMDS(CiTestCase): m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds function should reuse the dhcp ctx if it is already present. This happens when we wait for nic to be hot-attached before @@ -3292,7 +3206,13 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.assertEqual(2, m_request.call_count) def test_does_not_poll_imds_report_ready_when_marker_file_exists( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should not call report ready when the reported ready marker file exists""" @@ -3314,7 +3234,13 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.assertEqual(m_report_ready.call_count, 0) def test_poll_imds_report_ready_success_writes_marker_file( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should write the report_ready marker file if reporting ready succeeds""" @@ -3337,7 +3263,13 @@ class TestPreprovisioningPollIMDS(CiTestCase): self.assertTrue(os.path.exists(report_file)) def test_poll_imds_report_ready_failure_raises_exc_and_doesnt_write_marker( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should write the report_ready marker file if reporting ready succeeds""" |