diff options
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 120 | ||||
-rwxr-xr-x | cloudinit/sources/helpers/azure.py | 99 | ||||
-rw-r--r-- | tests/unittests/test_reporting_hyperv.py | 22 |
3 files changed, 142 insertions, 99 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index fc32f8b1..8858fbd5 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -299,9 +299,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'): try: set_hostname(temp_hostname, hostname_command) except Exception as e: - msg = 'Failed setting temporary hostname: %s' % e - report_diagnostic_event(msg) - LOG.warning(msg) + report_diagnostic_event( + 'Failed setting temporary hostname: %s' % e, + logger_func=LOG.warning) yield None return try: @@ -356,7 +356,9 @@ class DataSourceAzure(sources.DataSource): cfg=cfg, prev_hostname=previous_hn) except Exception as e: - LOG.warning("Failed publishing hostname: %s", e) + report_diagnostic_event( + "Failed publishing hostname: %s" % e, + logger_func=LOG.warning) util.logexc(LOG, "handling set_hostname failed") return False @@ -454,24 +456,23 @@ class DataSourceAzure(sources.DataSource): except NonAzureDataSource: report_diagnostic_event( - "Did not find Azure data source in %s" % cdev) + "Did not find Azure data source in %s" % cdev, + logger_func=LOG.debug) continue except BrokenAzureDataSource as exc: msg = 'BrokenAzureDataSource: %s' % exc - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) except util.MountFailedError: - msg = '%s was not mountable' % cdev - report_diagnostic_event(msg) - LOG.warning(msg) + report_diagnostic_event( + '%s was not mountable' % cdev, logger_func=LOG.warning) continue perform_reprovision = reprovision or self._should_reprovision(ret) if perform_reprovision: if util.is_FreeBSD(): msg = "Free BSD is not supported for PPS VMs" - LOG.error(msg) - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) ret = self._reprovision() imds_md = get_metadata_from_imds( @@ -486,16 +487,18 @@ class DataSourceAzure(sources.DataSource): 'userdata_raw': userdata_raw}) found = cdev - LOG.debug("found datasource in %s", cdev) + report_diagnostic_event( + 'found datasource in %s' % cdev, logger_func=LOG.debug) break if not found: msg = 'No Azure metadata found' - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) if found == ddir: - LOG.debug("using files cached in %s", ddir) + report_diagnostic_event( + "using files cached in %s" % ddir, logger_func=LOG.debug) seed = _get_random_seed() if seed: @@ -516,7 +519,8 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=lease) except Exception as e: report_diagnostic_event( - "exception while reporting ready: %s" % e) + "exception while reporting ready: %s" % e, + logger_func=LOG.error) raise return crawled_data @@ -605,14 +609,14 @@ class DataSourceAzure(sources.DataSource): except KeyError: log_msg = 'Unable to get keys from IMDS, falling back to OVF' LOG.debug(log_msg) - report_diagnostic_event(log_msg) + report_diagnostic_event(log_msg, logger_func=LOG.debug) try: ssh_keys = self.metadata['public-keys'] LOG.debug('Retrieved keys from OVF') except KeyError: log_msg = 'No keys available from OVF' LOG.debug(log_msg) - report_diagnostic_event(log_msg) + report_diagnostic_event(log_msg, logger_func=LOG.debug) return ssh_keys @@ -666,16 +670,14 @@ class DataSourceAzure(sources.DataSource): if self.imds_poll_counter == self.imds_logging_threshold: # Reducing the logging frequency as we are polling IMDS self.imds_logging_threshold *= 2 - LOG.debug("Call to IMDS with arguments %s failed " - "with status code %s after %s retries", - msg, exception.code, self.imds_poll_counter) LOG.debug("Backing off logging threshold for the same " "exception to %d", self.imds_logging_threshold) report_diagnostic_event("poll IMDS with %s failed. " "Exception: %s and code: %s" % (msg, exception.cause, - exception.code)) + exception.code), + logger_func=LOG.debug) self.imds_poll_counter += 1 return True else: @@ -684,12 +686,15 @@ class DataSourceAzure(sources.DataSource): report_diagnostic_event("poll IMDS with %s failed. " "Exception: %s and code: %s" % (msg, exception.cause, - exception.code)) + exception.code), + logger_func=LOG.warning) return False - LOG.debug("poll IMDS failed with an unexpected exception: %s", - exception) - return False + report_diagnostic_event( + "poll IMDS failed with an " + "unexpected exception: %s" % exception, + logger_func=LOG.warning) + return False LOG.debug("Wait for vnetswitch to happen") while True: @@ -709,8 +714,9 @@ class DataSourceAzure(sources.DataSource): try: nl_sock = netlink.create_bound_netlink_socket() except netlink.NetlinkCreateSocketError as e: - report_diagnostic_event(e) - LOG.warning(e) + report_diagnostic_event( + 'Failed to create bound netlink socket: %s' % e, + logger_func=LOG.warning) self._ephemeral_dhcp_ctx.clean_network() break @@ -729,9 +735,10 @@ class DataSourceAzure(sources.DataSource): try: netlink.wait_for_media_disconnect_connect( nl_sock, lease['interface']) - except AssertionError as error: - report_diagnostic_event(error) - LOG.error(error) + except AssertionError as e: + report_diagnostic_event( + 'Error while waiting for vnet switch: %s' % e, + logger_func=LOG.error) break vnet_switched = True @@ -757,9 +764,11 @@ class DataSourceAzure(sources.DataSource): if vnet_switched: report_diagnostic_event("attempted dhcp %d times after reuse" % - dhcp_attempts) + dhcp_attempts, + logger_func=LOG.debug) report_diagnostic_event("polled imds %d times after reuse" % - self.imds_poll_counter) + self.imds_poll_counter, + logger_func=LOG.debug) return return_val @@ -768,10 +777,10 @@ class DataSourceAzure(sources.DataSource): """Tells the fabric provisioning has completed """ try: get_metadata_from_fabric(None, lease['unknown-245']) - except Exception: - LOG.warning( - "Error communicating with Azure fabric; You may experience." - "connectivity issues.", exc_info=True) + except Exception as e: + report_diagnostic_event( + "Error communicating with Azure fabric; You may experience " + "connectivity issues: %s" % e, logger_func=LOG.warning) def _should_reprovision(self, ret): """Whether or not we should poll IMDS for reprovisioning data. @@ -849,10 +858,7 @@ class DataSourceAzure(sources.DataSource): except Exception as e: report_diagnostic_event( "Error communicating with Azure fabric; You may experience " - "connectivity issues: %s" % e) - LOG.warning( - "Error communicating with Azure fabric; You may experience " - "connectivity issues.", exc_info=True) + "connectivity issues: %s" % e, logger_func=LOG.warning) return False util.del_file(REPORTED_READY_MARKER_FILE) @@ -1017,9 +1023,10 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, log_pre="Azure ephemeral disk: ") if missing: - LOG.warning("ephemeral device '%s' did" - " not appear after %d seconds.", - devpath, maxwait) + report_diagnostic_event( + "ephemeral device '%s' did not appear after %d seconds." % + (devpath, maxwait), + logger_func=LOG.warning) return result = False @@ -1104,7 +1111,9 @@ def pubkeys_from_crt_files(flist): errors.append(fname) if errors: - LOG.warning("failed to convert the crt files to pubkey: %s", errors) + report_diagnostic_event( + "failed to convert the crt files to pubkey: %s" % errors, + logger_func=LOG.warning) return pubkeys @@ -1216,7 +1225,7 @@ def read_azure_ovf(contents): dom = minidom.parseString(contents) except Exception as e: error_str = "Invalid ovf-env.xml: %s" % e - report_diagnostic_event(error_str) + report_diagnostic_event(error_str, logger_func=LOG.warning) raise BrokenAzureDataSource(error_str) from e results = find_child(dom.documentElement, @@ -1523,7 +1532,9 @@ def get_metadata_from_imds(fallback_nic, retries): azure_ds_reporter, fallback_nic): return util.log_time(**kwargs) except Exception as e: - report_diagnostic_event("exception while getting metadata: %s" % e) + report_diagnostic_event( + "exception while getting metadata: %s" % e, + logger_func=LOG.warning) raise @@ -1537,9 +1548,10 @@ def _get_metadata_from_imds(retries): url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers, retries=retries, exception_cb=retry_on_url_exc) except Exception as e: - msg = 'Ignoring IMDS instance metadata: %s' % e - report_diagnostic_event(msg) - LOG.debug(msg) + report_diagnostic_event( + 'Ignoring IMDS instance metadata. ' + 'Get metadata from IMDS failed: %s' % e, + logger_func=LOG.warning) return {} try: from json.decoder import JSONDecodeError @@ -1550,9 +1562,10 @@ def _get_metadata_from_imds(retries): try: return util.load_json(str(response)) except json_decode_error as e: - report_diagnostic_event('non-json imds response' % e) - LOG.warning( - 'Ignoring non-json IMDS instance metadata: %s', str(response)) + report_diagnostic_event( + 'Ignoring non-json IMDS instance metadata response: %s. ' + 'Loading non-json IMDS response failed: %s' % (str(response), e), + logger_func=LOG.warning) return {} @@ -1604,9 +1617,8 @@ def _is_platform_viable(seed_dir): if asset_tag == AZURE_CHASSIS_ASSET_TAG: return True msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag - LOG.debug(msg) evt.description = msg - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.debug) if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')): return True return False diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 79445a81..560cadba 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -180,12 +180,15 @@ def get_system_info(): return evt -def report_diagnostic_event(str): +def report_diagnostic_event( + msg: str, *, logger_func=None) -> events.ReportingEvent: """Report a diagnostic event""" + if callable(logger_func): + logger_func(msg) evt = events.ReportingEvent( DIAGNOSTIC_EVENT_TYPE, 'diagnostic message', - str, events.DEFAULT_EVENT_ORIGIN) - events.report_event(evt) + msg, events.DEFAULT_EVENT_ORIGIN) + events.report_event(evt, excluded_handler_types={"log"}) # return the event for unit testing purpose return evt @@ -215,7 +218,8 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']): log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE)) if log_pushed_to_kvp: - report_diagnostic_event("cloud-init.log is already pushed to KVP") + report_diagnostic_event( + "cloud-init.log is already pushed to KVP", logger_func=LOG.debug) return LOG.debug("Dumping cloud-init.log file to KVP") @@ -225,13 +229,15 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']): seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0) report_diagnostic_event( "Dumping last {} bytes of cloud-init.log file to KVP".format( - f.tell() - seek_index)) + f.tell() - seek_index), + logger_func=LOG.debug) f.seek(seek_index, os.SEEK_SET) report_compressed_event("cloud-init.log", f.read()) util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '') except Exception as ex: - report_diagnostic_event("Exception when dumping log file: %s" % - repr(ex)) + report_diagnostic_event( + "Exception when dumping log file: %s" % repr(ex), + logger_func=LOG.warning) @contextmanager @@ -305,9 +311,9 @@ class GoalState: try: self.root = ElementTree.fromstring(unparsed_xml) except ElementTree.ParseError as e: - msg = 'Failed to parse GoalState XML: %s' - LOG.warning(msg, e) - report_diagnostic_event(msg % (e,)) + report_diagnostic_event( + 'Failed to parse GoalState XML: %s' % e, + logger_func=LOG.warning) raise self.container_id = self._text_from_xpath('./Container/ContainerId') @@ -317,9 +323,8 @@ class GoalState: for attr in ("container_id", "instance_id", "incarnation"): if getattr(self, attr) is None: - msg = 'Missing %s in GoalState XML' - LOG.warning(msg, attr) - report_diagnostic_event(msg % (attr,)) + msg = 'Missing %s in GoalState XML' % attr + report_diagnostic_event(msg, logger_func=LOG.warning) raise InvalidGoalStateXMLException(msg) self.certificates_xml = None @@ -513,9 +518,9 @@ class GoalStateHealthReporter: try: self._post_health_report(document=document) except Exception as e: - msg = "exception while reporting ready: %s" % e - LOG.error(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + "exception while reporting ready: %s" % e, + logger_func=LOG.error) raise LOG.info('Reported ready to Azure fabric.') @@ -698,39 +703,48 @@ class WALinuxAgentShim: value = dhcp245 LOG.debug("Using Azure Endpoint from dhcp options") if value is None: - report_diagnostic_event("No Azure endpoint from dhcp options") - LOG.debug('Finding Azure endpoint from networkd...') + report_diagnostic_event( + 'No Azure endpoint from dhcp options. ' + 'Finding Azure endpoint from networkd...', + logger_func=LOG.debug) value = WALinuxAgentShim._networkd_get_value_from_leases() if value is None: # Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json # a dhclient exit hook that calls cloud-init-dhclient-hook - report_diagnostic_event("No Azure endpoint from networkd") - LOG.debug('Finding Azure endpoint from hook json...') + report_diagnostic_event( + 'No Azure endpoint from networkd. ' + 'Finding Azure endpoint from hook json...', + logger_func=LOG.debug) dhcp_options = WALinuxAgentShim._load_dhclient_json() value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options) if value is None: # Fallback and check the leases file if unsuccessful - report_diagnostic_event("No Azure endpoint from dhclient logs") - LOG.debug("Unable to find endpoint in dhclient logs. " - " Falling back to check lease files") + report_diagnostic_event( + 'No Azure endpoint from dhclient logs. ' + 'Unable to find endpoint in dhclient logs. ' + 'Falling back to check lease files', + logger_func=LOG.debug) if fallback_lease_file is None: - LOG.warning("No fallback lease file was specified.") + report_diagnostic_event( + 'No fallback lease file was specified.', + logger_func=LOG.warning) value = None else: - LOG.debug("Looking for endpoint in lease file %s", - fallback_lease_file) + report_diagnostic_event( + 'Looking for endpoint in lease file %s' + % fallback_lease_file, logger_func=LOG.debug) value = WALinuxAgentShim._get_value_from_leases_file( fallback_lease_file) if value is None: - msg = "No lease found; using default endpoint" - report_diagnostic_event(msg) - LOG.warning(msg) value = DEFAULT_WIRESERVER_ENDPOINT + report_diagnostic_event( + 'No lease found; using default endpoint: %s' % value, + logger_func=LOG.warning) endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value) - msg = 'Azure endpoint found at %s' % endpoint_ip_address - report_diagnostic_event(msg) - LOG.debug(msg) + report_diagnostic_event( + 'Azure endpoint found at %s' % endpoint_ip_address, + logger_func=LOG.debug) return endpoint_ip_address @azure_ds_telemetry_reporter @@ -795,9 +809,9 @@ class WALinuxAgentShim: parent=azure_ds_reporter): response = self.azure_endpoint_client.get(url) except Exception as e: - msg = 'failed to register with Azure: %s' % e - LOG.warning(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + 'failed to register with Azure and fetch GoalState XML: %s' + % e, logger_func=LOG.warning) raise LOG.debug('Successfully fetched GoalState XML.') return response.contents @@ -819,16 +833,15 @@ class WALinuxAgentShim: need_certificate ) except Exception as e: - msg = 'Error processing GoalState XML: %s' % e - LOG.warning(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + 'Error processing GoalState XML: %s' % e, + logger_func=LOG.warning) raise msg = ', '.join([ 'GoalState XML container id: %s' % goal_state.container_id, 'GoalState XML instance id: %s' % goal_state.instance_id, 'GoalState XML incarnation: %s' % goal_state.incarnation]) - LOG.debug(msg) - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.debug) return goal_state @azure_ds_telemetry_reporter @@ -910,8 +923,10 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None, def dhcp_log_cb(out, err): - report_diagnostic_event("dhclient output stream: %s" % out) - report_diagnostic_event("dhclient error stream: %s" % err) + report_diagnostic_event( + "dhclient output stream: %s" % out, logger_func=LOG.debug) + report_diagnostic_event( + "dhclient error stream: %s" % err, logger_func=LOG.debug) class EphemeralDHCPv4WithReporting: diff --git a/tests/unittests/test_reporting_hyperv.py b/tests/unittests/test_reporting_hyperv.py index 47ede670..3f63a60e 100644 --- a/tests/unittests/test_reporting_hyperv.py +++ b/tests/unittests/test_reporting_hyperv.py @@ -188,18 +188,34 @@ class TextKvpReporter(CiTestCase): if not re.search("variant=" + pattern, evt_msg): raise AssertionError("missing distro variant string") - def test_report_diagnostic_event(self): + def test_report_diagnostic_event_without_logger_func(self): reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) + diagnostic_msg = "test_diagnostic" + reporter.publish_event( + azure.report_diagnostic_event(diagnostic_msg)) + reporter.q.join() + kvps = list(reporter._iterate_kvps(0)) + self.assertEqual(1, len(kvps)) + evt_msg = kvps[0]['value'] + + if diagnostic_msg not in evt_msg: + raise AssertionError("missing expected diagnostic message") + def test_report_diagnostic_event_with_logger_func(self): + reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) + logger_func = mock.MagicMock() + diagnostic_msg = "test_diagnostic" reporter.publish_event( - azure.report_diagnostic_event("test_diagnostic")) + azure.report_diagnostic_event(diagnostic_msg, + logger_func=logger_func)) reporter.q.join() kvps = list(reporter._iterate_kvps(0)) self.assertEqual(1, len(kvps)) evt_msg = kvps[0]['value'] - if "test_diagnostic" not in evt_msg: + if diagnostic_msg not in evt_msg: raise AssertionError("missing expected diagnostic message") + logger_func.assert_called_once_with(diagnostic_msg) def test_report_compressed_event(self): reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) |