diff options
| -rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 120 | ||||
| -rwxr-xr-x | cloudinit/sources/helpers/azure.py | 99 | ||||
| -rw-r--r-- | tests/unittests/test_reporting_hyperv.py | 22 | 
3 files changed, 142 insertions, 99 deletions
| diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index fc32f8b1..8858fbd5 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -299,9 +299,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):      try:          set_hostname(temp_hostname, hostname_command)      except Exception as e: -        msg = 'Failed setting temporary hostname: %s' % e -        report_diagnostic_event(msg) -        LOG.warning(msg) +        report_diagnostic_event( +            'Failed setting temporary hostname: %s' % e, +            logger_func=LOG.warning)          yield None          return      try: @@ -356,7 +356,9 @@ class DataSourceAzure(sources.DataSource):                                                     cfg=cfg,                                                     prev_hostname=previous_hn)                  except Exception as e: -                    LOG.warning("Failed publishing hostname: %s", e) +                    report_diagnostic_event( +                        "Failed publishing hostname: %s" % e, +                        logger_func=LOG.warning)                      util.logexc(LOG, "handling set_hostname failed")          return False @@ -454,24 +456,23 @@ class DataSourceAzure(sources.DataSource):              except NonAzureDataSource:                  report_diagnostic_event( -                    "Did not find Azure data source in %s" % cdev) +                    "Did not find Azure data source in %s" % cdev, +                    logger_func=LOG.debug)                  continue              except BrokenAzureDataSource as exc:                  msg = 'BrokenAzureDataSource: %s' % exc -                report_diagnostic_event(msg) +                report_diagnostic_event(msg, logger_func=LOG.error)                  raise sources.InvalidMetaDataException(msg)              except util.MountFailedError: -                msg = '%s was not mountable' % cdev -                report_diagnostic_event(msg) -                LOG.warning(msg) +                report_diagnostic_event( +                    '%s was not mountable' % cdev, logger_func=LOG.warning)                  continue              perform_reprovision = reprovision or self._should_reprovision(ret)              if perform_reprovision:                  if util.is_FreeBSD():                      msg = "Free BSD is not supported for PPS VMs" -                    LOG.error(msg) -                    report_diagnostic_event(msg) +                    report_diagnostic_event(msg, logger_func=LOG.error)                      raise sources.InvalidMetaDataException(msg)                  ret = self._reprovision()              imds_md = get_metadata_from_imds( @@ -486,16 +487,18 @@ class DataSourceAzure(sources.DataSource):                  'userdata_raw': userdata_raw})              found = cdev -            LOG.debug("found datasource in %s", cdev) +            report_diagnostic_event( +                'found datasource in %s' % cdev, logger_func=LOG.debug)              break          if not found:              msg = 'No Azure metadata found' -            report_diagnostic_event(msg) +            report_diagnostic_event(msg, logger_func=LOG.error)              raise sources.InvalidMetaDataException(msg)          if found == ddir: -            LOG.debug("using files cached in %s", ddir) +            report_diagnostic_event( +                "using files cached in %s" % ddir, logger_func=LOG.debug)          seed = _get_random_seed()          if seed: @@ -516,7 +519,8 @@ class DataSourceAzure(sources.DataSource):                          self._report_ready(lease=lease)                  except Exception as e:                      report_diagnostic_event( -                        "exception while reporting ready: %s" % e) +                        "exception while reporting ready: %s" % e, +                        logger_func=LOG.error)                      raise          return crawled_data @@ -605,14 +609,14 @@ class DataSourceAzure(sources.DataSource):          except KeyError:              log_msg = 'Unable to get keys from IMDS, falling back to OVF'              LOG.debug(log_msg) -            report_diagnostic_event(log_msg) +            report_diagnostic_event(log_msg, logger_func=LOG.debug)              try:                  ssh_keys = self.metadata['public-keys']                  LOG.debug('Retrieved keys from OVF')              except KeyError:                  log_msg = 'No keys available from OVF'                  LOG.debug(log_msg) -                report_diagnostic_event(log_msg) +                report_diagnostic_event(log_msg, logger_func=LOG.debug)          return ssh_keys @@ -666,16 +670,14 @@ class DataSourceAzure(sources.DataSource):                      if self.imds_poll_counter == self.imds_logging_threshold:                          # Reducing the logging frequency as we are polling IMDS                          self.imds_logging_threshold *= 2 -                        LOG.debug("Call to IMDS with arguments %s failed " -                                  "with status code %s after %s retries", -                                  msg, exception.code, self.imds_poll_counter)                          LOG.debug("Backing off logging threshold for the same "                                    "exception to %d",                                    self.imds_logging_threshold)                          report_diagnostic_event("poll IMDS with %s failed. "                                                  "Exception: %s and code: %s" %                                                  (msg, exception.cause, -                                                 exception.code)) +                                                 exception.code), +                                                logger_func=LOG.debug)                      self.imds_poll_counter += 1                      return True                  else: @@ -684,12 +686,15 @@ class DataSourceAzure(sources.DataSource):                      report_diagnostic_event("poll IMDS with %s failed. "                                              "Exception: %s and code: %s" %                                              (msg, exception.cause, -                                             exception.code)) +                                             exception.code), +                                            logger_func=LOG.warning)                      return False -                LOG.debug("poll IMDS failed with an unexpected exception: %s", -                          exception) -                return False +            report_diagnostic_event( +                "poll IMDS failed with an " +                "unexpected exception: %s" % exception, +                logger_func=LOG.warning) +            return False          LOG.debug("Wait for vnetswitch to happen")          while True: @@ -709,8 +714,9 @@ class DataSourceAzure(sources.DataSource):                      try:                          nl_sock = netlink.create_bound_netlink_socket()                      except netlink.NetlinkCreateSocketError as e: -                        report_diagnostic_event(e) -                        LOG.warning(e) +                        report_diagnostic_event( +                            'Failed to create bound netlink socket: %s' % e, +                            logger_func=LOG.warning)                          self._ephemeral_dhcp_ctx.clean_network()                          break @@ -729,9 +735,10 @@ class DataSourceAzure(sources.DataSource):                          try:                              netlink.wait_for_media_disconnect_connect(                                  nl_sock, lease['interface']) -                        except AssertionError as error: -                            report_diagnostic_event(error) -                            LOG.error(error) +                        except AssertionError as e: +                            report_diagnostic_event( +                                'Error while waiting for vnet switch: %s' % e, +                                logger_func=LOG.error)                              break                      vnet_switched = True @@ -757,9 +764,11 @@ class DataSourceAzure(sources.DataSource):          if vnet_switched:              report_diagnostic_event("attempted dhcp %d times after reuse" % -                                    dhcp_attempts) +                                    dhcp_attempts, +                                    logger_func=LOG.debug)              report_diagnostic_event("polled imds %d times after reuse" % -                                    self.imds_poll_counter) +                                    self.imds_poll_counter, +                                    logger_func=LOG.debug)          return return_val @@ -768,10 +777,10 @@ class DataSourceAzure(sources.DataSource):          """Tells the fabric provisioning has completed """          try:              get_metadata_from_fabric(None, lease['unknown-245']) -        except Exception: -            LOG.warning( -                "Error communicating with Azure fabric; You may experience." -                "connectivity issues.", exc_info=True) +        except Exception as e: +            report_diagnostic_event( +                "Error communicating with Azure fabric; You may experience " +                "connectivity issues: %s" % e, logger_func=LOG.warning)      def _should_reprovision(self, ret):          """Whether or not we should poll IMDS for reprovisioning data. @@ -849,10 +858,7 @@ class DataSourceAzure(sources.DataSource):          except Exception as e:              report_diagnostic_event(                  "Error communicating with Azure fabric; You may experience " -                "connectivity issues: %s" % e) -            LOG.warning( -                "Error communicating with Azure fabric; You may experience " -                "connectivity issues.", exc_info=True) +                "connectivity issues: %s" % e, logger_func=LOG.warning)              return False          util.del_file(REPORTED_READY_MARKER_FILE) @@ -1017,9 +1023,10 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,                                        log_pre="Azure ephemeral disk: ")          if missing: -            LOG.warning("ephemeral device '%s' did" -                        " not appear after %d seconds.", -                        devpath, maxwait) +            report_diagnostic_event( +                "ephemeral device '%s' did not appear after %d seconds." % +                (devpath, maxwait), +                logger_func=LOG.warning)              return      result = False @@ -1104,7 +1111,9 @@ def pubkeys_from_crt_files(flist):              errors.append(fname)      if errors: -        LOG.warning("failed to convert the crt files to pubkey: %s", errors) +        report_diagnostic_event( +            "failed to convert the crt files to pubkey: %s" % errors, +            logger_func=LOG.warning)      return pubkeys @@ -1216,7 +1225,7 @@ def read_azure_ovf(contents):          dom = minidom.parseString(contents)      except Exception as e:          error_str = "Invalid ovf-env.xml: %s" % e -        report_diagnostic_event(error_str) +        report_diagnostic_event(error_str, logger_func=LOG.warning)          raise BrokenAzureDataSource(error_str) from e      results = find_child(dom.documentElement, @@ -1523,7 +1532,9 @@ def get_metadata_from_imds(fallback_nic, retries):                      azure_ds_reporter, fallback_nic):                  return util.log_time(**kwargs)          except Exception as e: -            report_diagnostic_event("exception while getting metadata: %s" % e) +            report_diagnostic_event( +                "exception while getting metadata: %s" % e, +                logger_func=LOG.warning)              raise @@ -1537,9 +1548,10 @@ def _get_metadata_from_imds(retries):              url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers,              retries=retries, exception_cb=retry_on_url_exc)      except Exception as e: -        msg = 'Ignoring IMDS instance metadata: %s' % e -        report_diagnostic_event(msg) -        LOG.debug(msg) +        report_diagnostic_event( +            'Ignoring IMDS instance metadata. ' +            'Get metadata from IMDS failed: %s' % e, +            logger_func=LOG.warning)          return {}      try:          from json.decoder import JSONDecodeError @@ -1550,9 +1562,10 @@ def _get_metadata_from_imds(retries):      try:          return util.load_json(str(response))      except json_decode_error as e: -        report_diagnostic_event('non-json imds response' % e) -        LOG.warning( -            'Ignoring non-json IMDS instance metadata: %s', str(response)) +        report_diagnostic_event( +            'Ignoring non-json IMDS instance metadata response: %s. ' +            'Loading non-json IMDS response failed: %s' % (str(response), e), +            logger_func=LOG.warning)      return {} @@ -1604,9 +1617,8 @@ def _is_platform_viable(seed_dir):          if asset_tag == AZURE_CHASSIS_ASSET_TAG:              return True          msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag -        LOG.debug(msg)          evt.description = msg -        report_diagnostic_event(msg) +        report_diagnostic_event(msg, logger_func=LOG.debug)          if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')):              return True          return False diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 79445a81..560cadba 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -180,12 +180,15 @@ def get_system_info():      return evt -def report_diagnostic_event(str): +def report_diagnostic_event( +        msg: str, *, logger_func=None) -> events.ReportingEvent:      """Report a diagnostic event""" +    if callable(logger_func): +        logger_func(msg)      evt = events.ReportingEvent(          DIAGNOSTIC_EVENT_TYPE, 'diagnostic message', -        str, events.DEFAULT_EVENT_ORIGIN) -    events.report_event(evt) +        msg, events.DEFAULT_EVENT_ORIGIN) +    events.report_event(evt, excluded_handler_types={"log"})      # return the event for unit testing purpose      return evt @@ -215,7 +218,8 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):      log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE))      if log_pushed_to_kvp: -        report_diagnostic_event("cloud-init.log is already pushed to KVP") +        report_diagnostic_event( +            "cloud-init.log is already pushed to KVP", logger_func=LOG.debug)          return      LOG.debug("Dumping cloud-init.log file to KVP") @@ -225,13 +229,15 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):              seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0)              report_diagnostic_event(                  "Dumping last {} bytes of cloud-init.log file to KVP".format( -                    f.tell() - seek_index)) +                    f.tell() - seek_index), +                logger_func=LOG.debug)              f.seek(seek_index, os.SEEK_SET)              report_compressed_event("cloud-init.log", f.read())          util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '')      except Exception as ex: -        report_diagnostic_event("Exception when dumping log file: %s" % -                                repr(ex)) +        report_diagnostic_event( +            "Exception when dumping log file: %s" % repr(ex), +            logger_func=LOG.warning)  @contextmanager @@ -305,9 +311,9 @@ class GoalState:          try:              self.root = ElementTree.fromstring(unparsed_xml)          except ElementTree.ParseError as e: -            msg = 'Failed to parse GoalState XML: %s' -            LOG.warning(msg, e) -            report_diagnostic_event(msg % (e,)) +            report_diagnostic_event( +                'Failed to parse GoalState XML: %s' % e, +                logger_func=LOG.warning)              raise          self.container_id = self._text_from_xpath('./Container/ContainerId') @@ -317,9 +323,8 @@ class GoalState:          for attr in ("container_id", "instance_id", "incarnation"):              if getattr(self, attr) is None: -                msg = 'Missing %s in GoalState XML' -                LOG.warning(msg, attr) -                report_diagnostic_event(msg % (attr,)) +                msg = 'Missing %s in GoalState XML' % attr +                report_diagnostic_event(msg, logger_func=LOG.warning)                  raise InvalidGoalStateXMLException(msg)          self.certificates_xml = None @@ -513,9 +518,9 @@ class GoalStateHealthReporter:          try:              self._post_health_report(document=document)          except Exception as e: -            msg = "exception while reporting ready: %s" % e -            LOG.error(msg) -            report_diagnostic_event(msg) +            report_diagnostic_event( +                "exception while reporting ready: %s" % e, +                logger_func=LOG.error)              raise          LOG.info('Reported ready to Azure fabric.') @@ -698,39 +703,48 @@ class WALinuxAgentShim:              value = dhcp245              LOG.debug("Using Azure Endpoint from dhcp options")          if value is None: -            report_diagnostic_event("No Azure endpoint from dhcp options") -            LOG.debug('Finding Azure endpoint from networkd...') +            report_diagnostic_event( +                'No Azure endpoint from dhcp options. ' +                'Finding Azure endpoint from networkd...', +                logger_func=LOG.debug)              value = WALinuxAgentShim._networkd_get_value_from_leases()          if value is None:              # Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json              # a dhclient exit hook that calls cloud-init-dhclient-hook -            report_diagnostic_event("No Azure endpoint from networkd") -            LOG.debug('Finding Azure endpoint from hook json...') +            report_diagnostic_event( +                'No Azure endpoint from networkd. ' +                'Finding Azure endpoint from hook json...', +                logger_func=LOG.debug)              dhcp_options = WALinuxAgentShim._load_dhclient_json()              value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options)          if value is None:              # Fallback and check the leases file if unsuccessful -            report_diagnostic_event("No Azure endpoint from dhclient logs") -            LOG.debug("Unable to find endpoint in dhclient logs. " -                      " Falling back to check lease files") +            report_diagnostic_event( +                'No Azure endpoint from dhclient logs. ' +                'Unable to find endpoint in dhclient logs. ' +                'Falling back to check lease files', +                logger_func=LOG.debug)              if fallback_lease_file is None: -                LOG.warning("No fallback lease file was specified.") +                report_diagnostic_event( +                    'No fallback lease file was specified.', +                    logger_func=LOG.warning)                  value = None              else: -                LOG.debug("Looking for endpoint in lease file %s", -                          fallback_lease_file) +                report_diagnostic_event( +                    'Looking for endpoint in lease file %s' +                    % fallback_lease_file, logger_func=LOG.debug)                  value = WALinuxAgentShim._get_value_from_leases_file(                      fallback_lease_file)          if value is None: -            msg = "No lease found; using default endpoint" -            report_diagnostic_event(msg) -            LOG.warning(msg)              value = DEFAULT_WIRESERVER_ENDPOINT +            report_diagnostic_event( +                'No lease found; using default endpoint: %s' % value, +                logger_func=LOG.warning)          endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value) -        msg = 'Azure endpoint found at %s' % endpoint_ip_address -        report_diagnostic_event(msg) -        LOG.debug(msg) +        report_diagnostic_event( +            'Azure endpoint found at %s' % endpoint_ip_address, +            logger_func=LOG.debug)          return endpoint_ip_address      @azure_ds_telemetry_reporter @@ -795,9 +809,9 @@ class WALinuxAgentShim:                      parent=azure_ds_reporter):                  response = self.azure_endpoint_client.get(url)          except Exception as e: -            msg = 'failed to register with Azure: %s' % e -            LOG.warning(msg) -            report_diagnostic_event(msg) +            report_diagnostic_event( +                'failed to register with Azure and fetch GoalState XML: %s' +                % e, logger_func=LOG.warning)              raise          LOG.debug('Successfully fetched GoalState XML.')          return response.contents @@ -819,16 +833,15 @@ class WALinuxAgentShim:                  need_certificate              )          except Exception as e: -            msg = 'Error processing GoalState XML: %s' % e -            LOG.warning(msg) -            report_diagnostic_event(msg) +            report_diagnostic_event( +                'Error processing GoalState XML: %s' % e, +                logger_func=LOG.warning)              raise          msg = ', '.join([              'GoalState XML container id: %s' % goal_state.container_id,              'GoalState XML instance id: %s' % goal_state.instance_id,              'GoalState XML incarnation: %s' % goal_state.incarnation]) -        LOG.debug(msg) -        report_diagnostic_event(msg) +        report_diagnostic_event(msg, logger_func=LOG.debug)          return goal_state      @azure_ds_telemetry_reporter @@ -910,8 +923,10 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None,  def dhcp_log_cb(out, err): -    report_diagnostic_event("dhclient output stream: %s" % out) -    report_diagnostic_event("dhclient error stream: %s" % err) +    report_diagnostic_event( +        "dhclient output stream: %s" % out, logger_func=LOG.debug) +    report_diagnostic_event( +        "dhclient error stream: %s" % err, logger_func=LOG.debug)  class EphemeralDHCPv4WithReporting: diff --git a/tests/unittests/test_reporting_hyperv.py b/tests/unittests/test_reporting_hyperv.py index 47ede670..3f63a60e 100644 --- a/tests/unittests/test_reporting_hyperv.py +++ b/tests/unittests/test_reporting_hyperv.py @@ -188,18 +188,34 @@ class TextKvpReporter(CiTestCase):          if not re.search("variant=" + pattern, evt_msg):              raise AssertionError("missing distro variant string") -    def test_report_diagnostic_event(self): +    def test_report_diagnostic_event_without_logger_func(self):          reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) +        diagnostic_msg = "test_diagnostic" +        reporter.publish_event( +            azure.report_diagnostic_event(diagnostic_msg)) +        reporter.q.join() +        kvps = list(reporter._iterate_kvps(0)) +        self.assertEqual(1, len(kvps)) +        evt_msg = kvps[0]['value'] + +        if diagnostic_msg not in evt_msg: +            raise AssertionError("missing expected diagnostic message") +    def test_report_diagnostic_event_with_logger_func(self): +        reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) +        logger_func = mock.MagicMock() +        diagnostic_msg = "test_diagnostic"          reporter.publish_event( -            azure.report_diagnostic_event("test_diagnostic")) +            azure.report_diagnostic_event(diagnostic_msg, +                                          logger_func=logger_func))          reporter.q.join()          kvps = list(reporter._iterate_kvps(0))          self.assertEqual(1, len(kvps))          evt_msg = kvps[0]['value'] -        if "test_diagnostic" not in evt_msg: +        if diagnostic_msg not in evt_msg:              raise AssertionError("missing expected diagnostic message") +        logger_func.assert_called_once_with(diagnostic_msg)      def test_report_compressed_event(self):          reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) | 
