From 3be666306023caf6f236464fb655741b6605bdf7 Mon Sep 17 00:00:00 2001 From: Thomas Stringer Date: Wed, 3 Mar 2021 11:07:43 -0500 Subject: Add flexibility to IMDS api-version (#793) Add flexibility to IMDS api-version by having both a desired IMDS api-version and a minimum api-version. The desired api-version will be used first, and if that fails it will fall back to the minimum api-version. --- cloudinit/sources/DataSourceAzure.py | 113 ++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 23 deletions(-) (limited to 'cloudinit/sources') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index cee630f7..6cae9e82 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -78,17 +78,15 @@ AGENT_SEED_DIR = '/var/lib/waagent' # In the event where the IMDS primary server is not # available, it takes 1s to fallback to the secondary one IMDS_TIMEOUT_IN_SECONDS = 2 -IMDS_URL = "http://169.254.169.254/metadata/" -IMDS_VER = "2019-06-01" -IMDS_VER_PARAM = "api-version={}".format(IMDS_VER) +IMDS_URL = "http://169.254.169.254/metadata" +IMDS_VER_MIN = "2019-06-01" +IMDS_VER_WANT = "2020-09-01" class metadata_type(Enum): - compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM) - network = "{}instance/network?{}".format(IMDS_URL, - IMDS_VER_PARAM) - reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL, - IMDS_VER_PARAM) + compute = "{}/instance".format(IMDS_URL) + network = "{}/instance/network".format(IMDS_URL) + reprovisiondata = "{}/reprovisiondata".format(IMDS_URL) PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0" @@ -349,6 +347,8 @@ class DataSourceAzure(sources.DataSource): self.update_events['network'].add(EventType.BOOT) self._ephemeral_dhcp_ctx = None + self.failed_desired_api_version = False + def __str__(self): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.seed) @@ -520,8 +520,10 @@ class DataSourceAzure(sources.DataSource): self._wait_for_all_nics_ready() ret = self._reprovision() - imds_md = get_metadata_from_imds( - self.fallback_interface, retries=10) + imds_md = self.get_imds_data_with_api_fallback( + self.fallback_interface, + retries=10 + ) (md, userdata_raw, cfg, files) = ret self.seed = cdev crawled_data.update({ @@ -652,6 +654,57 @@ class DataSourceAzure(sources.DataSource): self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700) return True + @azure_ds_telemetry_reporter + def get_imds_data_with_api_fallback( + self, + fallback_nic, + retries, + md_type=metadata_type.compute): + """ + Wrapper for get_metadata_from_imds so that we can have flexibility + in which IMDS api-version we use. If a particular instance of IMDS + does not have the api version that is desired, we want to make + this fault tolerant and fall back to a good known minimum api + version. + """ + + if not self.failed_desired_api_version: + for _ in range(retries): + try: + LOG.info( + "Attempting IMDS api-version: %s", + IMDS_VER_WANT + ) + return get_metadata_from_imds( + fallback_nic=fallback_nic, + retries=0, + md_type=md_type, + api_version=IMDS_VER_WANT + ) + except UrlError as err: + LOG.info( + "UrlError with IMDS api-version: %s", + IMDS_VER_WANT + ) + if err.code == 400: + log_msg = "Fall back to IMDS api-version: {}".format( + IMDS_VER_MIN + ) + report_diagnostic_event( + log_msg, + logger_func=LOG.info + ) + self.failed_desired_api_version = True + break + + LOG.info("Using IMDS api-version: %s", IMDS_VER_MIN) + return get_metadata_from_imds( + fallback_nic=fallback_nic, + retries=retries, + md_type=md_type, + api_version=IMDS_VER_MIN + ) + def device_name_to_device(self, name): return self.ds_cfg['disk_aliases'].get(name) @@ -880,10 +933,11 @@ class DataSourceAzure(sources.DataSource): # primary nic is being attached first helps here. Otherwise each nic # could add several seconds of delay. try: - imds_md = get_metadata_from_imds( + imds_md = self.get_imds_data_with_api_fallback( ifname, 5, - metadata_type.network) + metadata_type.network + ) except Exception as e: LOG.warning( "Failed to get network metadata using nic %s. Attempt to " @@ -1017,7 +1071,10 @@ class DataSourceAzure(sources.DataSource): def _poll_imds(self): """Poll IMDS for the new provisioning data until we get a valid response. Then return the returned JSON object.""" - url = metadata_type.reprovisiondata.value + url = "{}?api-version={}".format( + metadata_type.reprovisiondata.value, + IMDS_VER_MIN + ) headers = {"Metadata": "true"} nl_sock = None report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) @@ -2059,7 +2116,8 @@ def _generate_network_config_from_fallback_config() -> dict: @azure_ds_telemetry_reporter def get_metadata_from_imds(fallback_nic, retries, - md_type=metadata_type.compute): + md_type=metadata_type.compute, + api_version=IMDS_VER_MIN): """Query Azure's instance metadata service, returning a dictionary. If network is not up, setup ephemeral dhcp on fallback_nic to talk to the @@ -2069,13 +2127,16 @@ def get_metadata_from_imds(fallback_nic, @param fallback_nic: String. The name of the nic which requires active network in order to query IMDS. @param retries: The number of retries of the IMDS_URL. + @param md_type: Metadata type for IMDS request. + @param api_version: IMDS api-version to use in the request. @return: A dict of instance metadata containing compute and network info. """ kwargs = {'logfunc': LOG.debug, 'msg': 'Crawl of Azure Instance Metadata Service (IMDS)', - 'func': _get_metadata_from_imds, 'args': (retries, md_type,)} + 'func': _get_metadata_from_imds, + 'args': (retries, md_type, api_version,)} if net.is_up(fallback_nic): return util.log_time(**kwargs) else: @@ -2091,20 +2152,26 @@ def get_metadata_from_imds(fallback_nic, @azure_ds_telemetry_reporter -def _get_metadata_from_imds(retries, md_type=metadata_type.compute): - - url = md_type.value +def _get_metadata_from_imds( + retries, + md_type=metadata_type.compute, + api_version=IMDS_VER_MIN): + url = "{}?api-version={}".format(md_type.value, api_version) headers = {"Metadata": "true"} try: response = readurl( url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers, retries=retries, exception_cb=retry_on_url_exc) except Exception as e: - report_diagnostic_event( - 'Ignoring IMDS instance metadata. ' - 'Get metadata from IMDS failed: %s' % e, - logger_func=LOG.warning) - return {} + # pylint:disable=no-member + if isinstance(e, UrlError) and e.code == 400: + raise + else: + report_diagnostic_event( + 'Ignoring IMDS instance metadata. ' + 'Get metadata from IMDS failed: %s' % e, + logger_func=LOG.warning) + return {} try: from json.decoder import JSONDecodeError json_decode_error = JSONDecodeError -- cgit v1.2.3