summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py120
-rwxr-xr-xcloudinit/sources/helpers/azure.py99
-rw-r--r--tests/unittests/test_reporting_hyperv.py22
3 files changed, 142 insertions, 99 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index fc32f8b1..8858fbd5 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -299,9 +299,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):
try:
set_hostname(temp_hostname, hostname_command)
except Exception as e:
- msg = 'Failed setting temporary hostname: %s' % e
- report_diagnostic_event(msg)
- LOG.warning(msg)
+ report_diagnostic_event(
+ 'Failed setting temporary hostname: %s' % e,
+ logger_func=LOG.warning)
yield None
return
try:
@@ -356,7 +356,9 @@ class DataSourceAzure(sources.DataSource):
cfg=cfg,
prev_hostname=previous_hn)
except Exception as e:
- LOG.warning("Failed publishing hostname: %s", e)
+ report_diagnostic_event(
+ "Failed publishing hostname: %s" % e,
+ logger_func=LOG.warning)
util.logexc(LOG, "handling set_hostname failed")
return False
@@ -454,24 +456,23 @@ class DataSourceAzure(sources.DataSource):
except NonAzureDataSource:
report_diagnostic_event(
- "Did not find Azure data source in %s" % cdev)
+ "Did not find Azure data source in %s" % cdev,
+ logger_func=LOG.debug)
continue
except BrokenAzureDataSource as exc:
msg = 'BrokenAzureDataSource: %s' % exc
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
except util.MountFailedError:
- msg = '%s was not mountable' % cdev
- report_diagnostic_event(msg)
- LOG.warning(msg)
+ report_diagnostic_event(
+ '%s was not mountable' % cdev, logger_func=LOG.warning)
continue
perform_reprovision = reprovision or self._should_reprovision(ret)
if perform_reprovision:
if util.is_FreeBSD():
msg = "Free BSD is not supported for PPS VMs"
- LOG.error(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
ret = self._reprovision()
imds_md = get_metadata_from_imds(
@@ -486,16 +487,18 @@ class DataSourceAzure(sources.DataSource):
'userdata_raw': userdata_raw})
found = cdev
- LOG.debug("found datasource in %s", cdev)
+ report_diagnostic_event(
+ 'found datasource in %s' % cdev, logger_func=LOG.debug)
break
if not found:
msg = 'No Azure metadata found'
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
if found == ddir:
- LOG.debug("using files cached in %s", ddir)
+ report_diagnostic_event(
+ "using files cached in %s" % ddir, logger_func=LOG.debug)
seed = _get_random_seed()
if seed:
@@ -516,7 +519,8 @@ class DataSourceAzure(sources.DataSource):
self._report_ready(lease=lease)
except Exception as e:
report_diagnostic_event(
- "exception while reporting ready: %s" % e)
+ "exception while reporting ready: %s" % e,
+ logger_func=LOG.error)
raise
return crawled_data
@@ -605,14 +609,14 @@ class DataSourceAzure(sources.DataSource):
except KeyError:
log_msg = 'Unable to get keys from IMDS, falling back to OVF'
LOG.debug(log_msg)
- report_diagnostic_event(log_msg)
+ report_diagnostic_event(log_msg, logger_func=LOG.debug)
try:
ssh_keys = self.metadata['public-keys']
LOG.debug('Retrieved keys from OVF')
except KeyError:
log_msg = 'No keys available from OVF'
LOG.debug(log_msg)
- report_diagnostic_event(log_msg)
+ report_diagnostic_event(log_msg, logger_func=LOG.debug)
return ssh_keys
@@ -666,16 +670,14 @@ class DataSourceAzure(sources.DataSource):
if self.imds_poll_counter == self.imds_logging_threshold:
# Reducing the logging frequency as we are polling IMDS
self.imds_logging_threshold *= 2
- LOG.debug("Call to IMDS with arguments %s failed "
- "with status code %s after %s retries",
- msg, exception.code, self.imds_poll_counter)
LOG.debug("Backing off logging threshold for the same "
"exception to %d",
self.imds_logging_threshold)
report_diagnostic_event("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause,
- exception.code))
+ exception.code),
+ logger_func=LOG.debug)
self.imds_poll_counter += 1
return True
else:
@@ -684,12 +686,15 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause,
- exception.code))
+ exception.code),
+ logger_func=LOG.warning)
return False
- LOG.debug("poll IMDS failed with an unexpected exception: %s",
- exception)
- return False
+ report_diagnostic_event(
+ "poll IMDS failed with an "
+ "unexpected exception: %s" % exception,
+ logger_func=LOG.warning)
+ return False
LOG.debug("Wait for vnetswitch to happen")
while True:
@@ -709,8 +714,9 @@ class DataSourceAzure(sources.DataSource):
try:
nl_sock = netlink.create_bound_netlink_socket()
except netlink.NetlinkCreateSocketError as e:
- report_diagnostic_event(e)
- LOG.warning(e)
+ report_diagnostic_event(
+ 'Failed to create bound netlink socket: %s' % e,
+ logger_func=LOG.warning)
self._ephemeral_dhcp_ctx.clean_network()
break
@@ -729,9 +735,10 @@ class DataSourceAzure(sources.DataSource):
try:
netlink.wait_for_media_disconnect_connect(
nl_sock, lease['interface'])
- except AssertionError as error:
- report_diagnostic_event(error)
- LOG.error(error)
+ except AssertionError as e:
+ report_diagnostic_event(
+ 'Error while waiting for vnet switch: %s' % e,
+ logger_func=LOG.error)
break
vnet_switched = True
@@ -757,9 +764,11 @@ class DataSourceAzure(sources.DataSource):
if vnet_switched:
report_diagnostic_event("attempted dhcp %d times after reuse" %
- dhcp_attempts)
+ dhcp_attempts,
+ logger_func=LOG.debug)
report_diagnostic_event("polled imds %d times after reuse" %
- self.imds_poll_counter)
+ self.imds_poll_counter,
+ logger_func=LOG.debug)
return return_val
@@ -768,10 +777,10 @@ class DataSourceAzure(sources.DataSource):
"""Tells the fabric provisioning has completed """
try:
get_metadata_from_fabric(None, lease['unknown-245'])
- except Exception:
- LOG.warning(
- "Error communicating with Azure fabric; You may experience."
- "connectivity issues.", exc_info=True)
+ except Exception as e:
+ report_diagnostic_event(
+ "Error communicating with Azure fabric; You may experience "
+ "connectivity issues: %s" % e, logger_func=LOG.warning)
def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.
@@ -849,10 +858,7 @@ class DataSourceAzure(sources.DataSource):
except Exception as e:
report_diagnostic_event(
"Error communicating with Azure fabric; You may experience "
- "connectivity issues: %s" % e)
- LOG.warning(
- "Error communicating with Azure fabric; You may experience "
- "connectivity issues.", exc_info=True)
+ "connectivity issues: %s" % e, logger_func=LOG.warning)
return False
util.del_file(REPORTED_READY_MARKER_FILE)
@@ -1017,9 +1023,10 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
log_pre="Azure ephemeral disk: ")
if missing:
- LOG.warning("ephemeral device '%s' did"
- " not appear after %d seconds.",
- devpath, maxwait)
+ report_diagnostic_event(
+ "ephemeral device '%s' did not appear after %d seconds." %
+ (devpath, maxwait),
+ logger_func=LOG.warning)
return
result = False
@@ -1104,7 +1111,9 @@ def pubkeys_from_crt_files(flist):
errors.append(fname)
if errors:
- LOG.warning("failed to convert the crt files to pubkey: %s", errors)
+ report_diagnostic_event(
+ "failed to convert the crt files to pubkey: %s" % errors,
+ logger_func=LOG.warning)
return pubkeys
@@ -1216,7 +1225,7 @@ def read_azure_ovf(contents):
dom = minidom.parseString(contents)
except Exception as e:
error_str = "Invalid ovf-env.xml: %s" % e
- report_diagnostic_event(error_str)
+ report_diagnostic_event(error_str, logger_func=LOG.warning)
raise BrokenAzureDataSource(error_str) from e
results = find_child(dom.documentElement,
@@ -1523,7 +1532,9 @@ def get_metadata_from_imds(fallback_nic, retries):
azure_ds_reporter, fallback_nic):
return util.log_time(**kwargs)
except Exception as e:
- report_diagnostic_event("exception while getting metadata: %s" % e)
+ report_diagnostic_event(
+ "exception while getting metadata: %s" % e,
+ logger_func=LOG.warning)
raise
@@ -1537,9 +1548,10 @@ def _get_metadata_from_imds(retries):
url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers,
retries=retries, exception_cb=retry_on_url_exc)
except Exception as e:
- msg = 'Ignoring IMDS instance metadata: %s' % e
- report_diagnostic_event(msg)
- LOG.debug(msg)
+ report_diagnostic_event(
+ 'Ignoring IMDS instance metadata. '
+ 'Get metadata from IMDS failed: %s' % e,
+ logger_func=LOG.warning)
return {}
try:
from json.decoder import JSONDecodeError
@@ -1550,9 +1562,10 @@ def _get_metadata_from_imds(retries):
try:
return util.load_json(str(response))
except json_decode_error as e:
- report_diagnostic_event('non-json imds response' % e)
- LOG.warning(
- 'Ignoring non-json IMDS instance metadata: %s', str(response))
+ report_diagnostic_event(
+ 'Ignoring non-json IMDS instance metadata response: %s. '
+ 'Loading non-json IMDS response failed: %s' % (str(response), e),
+ logger_func=LOG.warning)
return {}
@@ -1604,9 +1617,8 @@ def _is_platform_viable(seed_dir):
if asset_tag == AZURE_CHASSIS_ASSET_TAG:
return True
msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag
- LOG.debug(msg)
evt.description = msg
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.debug)
if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')):
return True
return False
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index 79445a81..560cadba 100755
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -180,12 +180,15 @@ def get_system_info():
return evt
-def report_diagnostic_event(str):
+def report_diagnostic_event(
+ msg: str, *, logger_func=None) -> events.ReportingEvent:
"""Report a diagnostic event"""
+ if callable(logger_func):
+ logger_func(msg)
evt = events.ReportingEvent(
DIAGNOSTIC_EVENT_TYPE, 'diagnostic message',
- str, events.DEFAULT_EVENT_ORIGIN)
- events.report_event(evt)
+ msg, events.DEFAULT_EVENT_ORIGIN)
+ events.report_event(evt, excluded_handler_types={"log"})
# return the event for unit testing purpose
return evt
@@ -215,7 +218,8 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):
log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE))
if log_pushed_to_kvp:
- report_diagnostic_event("cloud-init.log is already pushed to KVP")
+ report_diagnostic_event(
+ "cloud-init.log is already pushed to KVP", logger_func=LOG.debug)
return
LOG.debug("Dumping cloud-init.log file to KVP")
@@ -225,13 +229,15 @@ def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):
seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0)
report_diagnostic_event(
"Dumping last {} bytes of cloud-init.log file to KVP".format(
- f.tell() - seek_index))
+ f.tell() - seek_index),
+ logger_func=LOG.debug)
f.seek(seek_index, os.SEEK_SET)
report_compressed_event("cloud-init.log", f.read())
util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '')
except Exception as ex:
- report_diagnostic_event("Exception when dumping log file: %s" %
- repr(ex))
+ report_diagnostic_event(
+ "Exception when dumping log file: %s" % repr(ex),
+ logger_func=LOG.warning)
@contextmanager
@@ -305,9 +311,9 @@ class GoalState:
try:
self.root = ElementTree.fromstring(unparsed_xml)
except ElementTree.ParseError as e:
- msg = 'Failed to parse GoalState XML: %s'
- LOG.warning(msg, e)
- report_diagnostic_event(msg % (e,))
+ report_diagnostic_event(
+ 'Failed to parse GoalState XML: %s' % e,
+ logger_func=LOG.warning)
raise
self.container_id = self._text_from_xpath('./Container/ContainerId')
@@ -317,9 +323,8 @@ class GoalState:
for attr in ("container_id", "instance_id", "incarnation"):
if getattr(self, attr) is None:
- msg = 'Missing %s in GoalState XML'
- LOG.warning(msg, attr)
- report_diagnostic_event(msg % (attr,))
+ msg = 'Missing %s in GoalState XML' % attr
+ report_diagnostic_event(msg, logger_func=LOG.warning)
raise InvalidGoalStateXMLException(msg)
self.certificates_xml = None
@@ -513,9 +518,9 @@ class GoalStateHealthReporter:
try:
self._post_health_report(document=document)
except Exception as e:
- msg = "exception while reporting ready: %s" % e
- LOG.error(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ "exception while reporting ready: %s" % e,
+ logger_func=LOG.error)
raise
LOG.info('Reported ready to Azure fabric.')
@@ -698,39 +703,48 @@ class WALinuxAgentShim:
value = dhcp245
LOG.debug("Using Azure Endpoint from dhcp options")
if value is None:
- report_diagnostic_event("No Azure endpoint from dhcp options")
- LOG.debug('Finding Azure endpoint from networkd...')
+ report_diagnostic_event(
+ 'No Azure endpoint from dhcp options. '
+ 'Finding Azure endpoint from networkd...',
+ logger_func=LOG.debug)
value = WALinuxAgentShim._networkd_get_value_from_leases()
if value is None:
# Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json
# a dhclient exit hook that calls cloud-init-dhclient-hook
- report_diagnostic_event("No Azure endpoint from networkd")
- LOG.debug('Finding Azure endpoint from hook json...')
+ report_diagnostic_event(
+ 'No Azure endpoint from networkd. '
+ 'Finding Azure endpoint from hook json...',
+ logger_func=LOG.debug)
dhcp_options = WALinuxAgentShim._load_dhclient_json()
value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options)
if value is None:
# Fallback and check the leases file if unsuccessful
- report_diagnostic_event("No Azure endpoint from dhclient logs")
- LOG.debug("Unable to find endpoint in dhclient logs. "
- " Falling back to check lease files")
+ report_diagnostic_event(
+ 'No Azure endpoint from dhclient logs. '
+ 'Unable to find endpoint in dhclient logs. '
+ 'Falling back to check lease files',
+ logger_func=LOG.debug)
if fallback_lease_file is None:
- LOG.warning("No fallback lease file was specified.")
+ report_diagnostic_event(
+ 'No fallback lease file was specified.',
+ logger_func=LOG.warning)
value = None
else:
- LOG.debug("Looking for endpoint in lease file %s",
- fallback_lease_file)
+ report_diagnostic_event(
+ 'Looking for endpoint in lease file %s'
+ % fallback_lease_file, logger_func=LOG.debug)
value = WALinuxAgentShim._get_value_from_leases_file(
fallback_lease_file)
if value is None:
- msg = "No lease found; using default endpoint"
- report_diagnostic_event(msg)
- LOG.warning(msg)
value = DEFAULT_WIRESERVER_ENDPOINT
+ report_diagnostic_event(
+ 'No lease found; using default endpoint: %s' % value,
+ logger_func=LOG.warning)
endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value)
- msg = 'Azure endpoint found at %s' % endpoint_ip_address
- report_diagnostic_event(msg)
- LOG.debug(msg)
+ report_diagnostic_event(
+ 'Azure endpoint found at %s' % endpoint_ip_address,
+ logger_func=LOG.debug)
return endpoint_ip_address
@azure_ds_telemetry_reporter
@@ -795,9 +809,9 @@ class WALinuxAgentShim:
parent=azure_ds_reporter):
response = self.azure_endpoint_client.get(url)
except Exception as e:
- msg = 'failed to register with Azure: %s' % e
- LOG.warning(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ 'failed to register with Azure and fetch GoalState XML: %s'
+ % e, logger_func=LOG.warning)
raise
LOG.debug('Successfully fetched GoalState XML.')
return response.contents
@@ -819,16 +833,15 @@ class WALinuxAgentShim:
need_certificate
)
except Exception as e:
- msg = 'Error processing GoalState XML: %s' % e
- LOG.warning(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ 'Error processing GoalState XML: %s' % e,
+ logger_func=LOG.warning)
raise
msg = ', '.join([
'GoalState XML container id: %s' % goal_state.container_id,
'GoalState XML instance id: %s' % goal_state.instance_id,
'GoalState XML incarnation: %s' % goal_state.incarnation])
- LOG.debug(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.debug)
return goal_state
@azure_ds_telemetry_reporter
@@ -910,8 +923,10 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None,
def dhcp_log_cb(out, err):
- report_diagnostic_event("dhclient output stream: %s" % out)
- report_diagnostic_event("dhclient error stream: %s" % err)
+ report_diagnostic_event(
+ "dhclient output stream: %s" % out, logger_func=LOG.debug)
+ report_diagnostic_event(
+ "dhclient error stream: %s" % err, logger_func=LOG.debug)
class EphemeralDHCPv4WithReporting:
diff --git a/tests/unittests/test_reporting_hyperv.py b/tests/unittests/test_reporting_hyperv.py
index 47ede670..3f63a60e 100644
--- a/tests/unittests/test_reporting_hyperv.py
+++ b/tests/unittests/test_reporting_hyperv.py
@@ -188,18 +188,34 @@ class TextKvpReporter(CiTestCase):
if not re.search("variant=" + pattern, evt_msg):
raise AssertionError("missing distro variant string")
- def test_report_diagnostic_event(self):
+ def test_report_diagnostic_event_without_logger_func(self):
reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path)
+ diagnostic_msg = "test_diagnostic"
+ reporter.publish_event(
+ azure.report_diagnostic_event(diagnostic_msg))
+ reporter.q.join()
+ kvps = list(reporter._iterate_kvps(0))
+ self.assertEqual(1, len(kvps))
+ evt_msg = kvps[0]['value']
+
+ if diagnostic_msg not in evt_msg:
+ raise AssertionError("missing expected diagnostic message")
+ def test_report_diagnostic_event_with_logger_func(self):
+ reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path)
+ logger_func = mock.MagicMock()
+ diagnostic_msg = "test_diagnostic"
reporter.publish_event(
- azure.report_diagnostic_event("test_diagnostic"))
+ azure.report_diagnostic_event(diagnostic_msg,
+ logger_func=logger_func))
reporter.q.join()
kvps = list(reporter._iterate_kvps(0))
self.assertEqual(1, len(kvps))
evt_msg = kvps[0]['value']
- if "test_diagnostic" not in evt_msg:
+ if diagnostic_msg not in evt_msg:
raise AssertionError("missing expected diagnostic message")
+ logger_func.assert_called_once_with(diagnostic_msg)
def test_report_compressed_event(self):
reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path)