summaryrefslogtreecommitdiff
path: root/cloudinit/sources
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/sources')
-rw-r--r--cloudinit/sources/DataSourceAliYun.py4
-rw-r--r--cloudinit/sources/DataSourceAltCloud.py3
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py937
-rw-r--r--cloudinit/sources/DataSourceBigstep.py3
-rw-r--r--cloudinit/sources/DataSourceCloudSigma.py4
-rw-r--r--cloudinit/sources/DataSourceEc2.py9
-rw-r--r--cloudinit/sources/DataSourceExoscale.py3
-rw-r--r--cloudinit/sources/DataSourceGCE.py5
-rw-r--r--cloudinit/sources/DataSourceHetzner.py34
-rw-r--r--cloudinit/sources/DataSourceNoCloud.py6
-rw-r--r--cloudinit/sources/DataSourceNone.py3
-rw-r--r--cloudinit/sources/DataSourceOVF.py9
-rw-r--r--cloudinit/sources/DataSourceOpenNebula.py3
-rw-r--r--cloudinit/sources/DataSourceOpenStack.py8
-rw-r--r--cloudinit/sources/DataSourceOracle.py5
-rw-r--r--cloudinit/sources/DataSourceRbxCloud.py16
-rw-r--r--cloudinit/sources/DataSourceScaleway.py3
-rw-r--r--cloudinit/sources/DataSourceSmartOS.py3
-rw-r--r--cloudinit/sources/__init__.py3
-rwxr-xr-xcloudinit/sources/helpers/azure.py354
-rw-r--r--cloudinit/sources/helpers/digitalocean.py5
-rw-r--r--cloudinit/sources/helpers/hetzner.py3
-rw-r--r--cloudinit/sources/helpers/netlink.py102
-rw-r--r--cloudinit/sources/helpers/openstack.py8
-rw-r--r--cloudinit/sources/helpers/tests/test_netlink.py74
-rw-r--r--cloudinit/sources/helpers/vmware/imc/config_nic.py1
-rw-r--r--cloudinit/sources/tests/test_oracle.py6
27 files changed, 1308 insertions, 306 deletions
diff --git a/cloudinit/sources/DataSourceAliYun.py b/cloudinit/sources/DataSourceAliYun.py
index 45cc9f00..09052873 100644
--- a/cloudinit/sources/DataSourceAliYun.py
+++ b/cloudinit/sources/DataSourceAliYun.py
@@ -1,8 +1,8 @@
# This file is part of cloud-init. See LICENSE file for license information.
+from cloudinit import dmi
from cloudinit import sources
from cloudinit.sources import DataSourceEc2 as EC2
-from cloudinit import util
ALIYUN_PRODUCT = "Alibaba Cloud ECS"
@@ -30,7 +30,7 @@ class DataSourceAliYun(EC2.DataSourceEc2):
def _is_aliyun():
- return util.read_dmi_data('system-product-name') == ALIYUN_PRODUCT
+ return dmi.read_dmi_data('system-product-name') == ALIYUN_PRODUCT
def parse_public_keys(public_keys):
diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py
index ac3ecc3d..cd93412a 100644
--- a/cloudinit/sources/DataSourceAltCloud.py
+++ b/cloudinit/sources/DataSourceAltCloud.py
@@ -16,6 +16,7 @@ import errno
import os
import os.path
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import subp
@@ -109,7 +110,7 @@ class DataSourceAltCloud(sources.DataSource):
CLOUD_INFO_FILE)
return 'UNKNOWN'
return cloud_type
- system_name = util.read_dmi_data("system-product-name")
+ system_name = dmi.read_dmi_data("system-product-name")
if not system_name:
return 'UNKNOWN'
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index f3c6452b..04ff2131 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -12,9 +12,12 @@ import os
import os.path
import re
from time import time
+from time import sleep
from xml.dom import minidom
import xml.etree.ElementTree as ET
+from enum import Enum
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import net
from cloudinit.event import EventType
@@ -28,6 +31,7 @@ from cloudinit import util
from cloudinit.reporting import events
from cloudinit.sources.helpers.azure import (
+ DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE,
azure_ds_reporter,
azure_ds_telemetry_reporter,
get_metadata_from_fabric,
@@ -37,7 +41,8 @@ from cloudinit.sources.helpers.azure import (
EphemeralDHCPv4WithReporting,
is_byte_swapped,
dhcp_log_cb,
- push_log_to_kvp)
+ push_log_to_kvp,
+ report_failure_to_fabric)
LOG = logging.getLogger(__name__)
@@ -64,13 +69,27 @@ DEFAULT_FS = 'ext4'
# DMI chassis-asset-tag is set static for all azure instances
AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
+REPROVISION_NIC_ATTACH_MARKER_FILE = "/var/lib/cloud/data/wait_for_nic_attach"
+REPROVISION_NIC_DETACHED_MARKER_FILE = "/var/lib/cloud/data/nic_detached"
REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
AGENT_SEED_DIR = '/var/lib/waagent'
+
# In the event where the IMDS primary server is not
# available, it takes 1s to fallback to the secondary one
IMDS_TIMEOUT_IN_SECONDS = 2
IMDS_URL = "http://169.254.169.254/metadata/"
+IMDS_VER = "2019-06-01"
+IMDS_VER_PARAM = "api-version={}".format(IMDS_VER)
+
+
+class metadata_type(Enum):
+ compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM)
+ network = "{}instance/network?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+ reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+
PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
@@ -83,6 +102,25 @@ UBUNTU_EXTENDED_NETWORK_SCRIPTS = [
'/run/network/interfaces.ephemeral.d',
]
+# This list is used to blacklist devices that will be considered
+# for renaming or fallback interfaces.
+#
+# On Azure network devices using these drivers are automatically
+# configured by the platform and should not be configured by
+# cloud-init's network configuration.
+#
+# Note:
+# Azure Dv4 and Ev4 series VMs always have mlx5 hardware.
+# https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series
+# https://docs.microsoft.com/en-us/azure/virtual-machines/ev4-esv4-series
+# Earlier D and E series VMs (such as Dv2, Dv3, and Ev3 series VMs)
+# can have either mlx4 or mlx5 hardware, with the older series VMs
+# having a higher chance of coming with mlx4 hardware.
+# https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series
+# https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series
+# https://docs.microsoft.com/en-us/azure/virtual-machines/ev3-esv3-series
+BLACKLIST_DRIVERS = ['mlx4_core', 'mlx5_core']
+
def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid):
# extract the 'X' from dev.storvsc.X. if deviceid matches
@@ -280,9 +318,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):
try:
set_hostname(temp_hostname, hostname_command)
except Exception as e:
- msg = 'Failed setting temporary hostname: %s' % e
- report_diagnostic_event(msg)
- LOG.warning(msg)
+ report_diagnostic_event(
+ 'Failed setting temporary hostname: %s' % e,
+ logger_func=LOG.warning)
yield None
return
try:
@@ -337,7 +375,9 @@ class DataSourceAzure(sources.DataSource):
cfg=cfg,
prev_hostname=previous_hn)
except Exception as e:
- LOG.warning("Failed publishing hostname: %s", e)
+ report_diagnostic_event(
+ "Failed publishing hostname: %s" % e,
+ logger_func=LOG.warning)
util.logexc(LOG, "handling set_hostname failed")
return False
@@ -410,20 +450,39 @@ class DataSourceAzure(sources.DataSource):
# need to look in the datadir and consider that valid
ddir = self.ds_cfg['data_dir']
+ # The order in which the candidates are inserted matters here, because
+ # it determines the value of ret. More specifically, the first one in
+ # the candidate list determines the path to take in order to get the
+ # metadata we need.
candidates = [self.seed_dir]
if os.path.isfile(REPROVISION_MARKER_FILE):
candidates.insert(0, "IMDS")
+ report_diagnostic_event("Reprovision marker file already present "
+ "before crawling Azure metadata: %s" %
+ REPROVISION_MARKER_FILE,
+ logger_func=LOG.debug)
+ elif os.path.isfile(REPROVISION_NIC_ATTACH_MARKER_FILE):
+ candidates.insert(0, "NIC_ATTACH_MARKER_PRESENT")
+ report_diagnostic_event("Reprovision nic attach marker file "
+ "already present before crawling Azure "
+ "metadata: %s" %
+ REPROVISION_NIC_ATTACH_MARKER_FILE,
+ logger_func=LOG.debug)
candidates.extend(list_possible_azure_ds_devs())
if ddir:
candidates.append(ddir)
found = None
reprovision = False
+ reprovision_after_nic_attach = False
for cdev in candidates:
try:
if cdev == "IMDS":
ret = None
reprovision = True
+ elif cdev == "NIC_ATTACH_MARKER_PRESENT":
+ ret = None
+ reprovision_after_nic_attach = True
elif cdev.startswith("/dev/"):
if util.is_FreeBSD():
ret = util.mount_cb(cdev, load_azure_ds_dir,
@@ -435,26 +494,32 @@ class DataSourceAzure(sources.DataSource):
except NonAzureDataSource:
report_diagnostic_event(
- "Did not find Azure data source in %s" % cdev)
+ "Did not find Azure data source in %s" % cdev,
+ logger_func=LOG.debug)
continue
except BrokenAzureDataSource as exc:
msg = 'BrokenAzureDataSource: %s' % exc
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
except util.MountFailedError:
- msg = '%s was not mountable' % cdev
- report_diagnostic_event(msg)
- LOG.warning(msg)
+ report_diagnostic_event(
+ '%s was not mountable' % cdev, logger_func=LOG.warning)
continue
perform_reprovision = reprovision or self._should_reprovision(ret)
- if perform_reprovision:
+ perform_reprovision_after_nic_attach = (
+ reprovision_after_nic_attach or
+ self._should_reprovision_after_nic_attach(ret))
+
+ if perform_reprovision or perform_reprovision_after_nic_attach:
if util.is_FreeBSD():
msg = "Free BSD is not supported for PPS VMs"
- LOG.error(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
+ if perform_reprovision_after_nic_attach:
+ self._wait_for_all_nics_ready()
ret = self._reprovision()
+
imds_md = get_metadata_from_imds(
self.fallback_interface, retries=10)
(md, userdata_raw, cfg, files) = ret
@@ -467,26 +532,29 @@ class DataSourceAzure(sources.DataSource):
'userdata_raw': userdata_raw})
found = cdev
- LOG.debug("found datasource in %s", cdev)
+ report_diagnostic_event(
+ 'found datasource in %s' % cdev, logger_func=LOG.debug)
break
if not found:
msg = 'No Azure metadata found'
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
if found == ddir:
- LOG.debug("using files cached in %s", ddir)
+ report_diagnostic_event(
+ "using files cached in %s" % ddir, logger_func=LOG.debug)
seed = _get_random_seed()
if seed:
crawled_data['metadata']['random_seed'] = seed
crawled_data['metadata']['instance-id'] = self._iid()
- if perform_reprovision:
+ if perform_reprovision or perform_reprovision_after_nic_attach:
LOG.info("Reporting ready to Azure after getting ReprovisionData")
- use_cached_ephemeral = (net.is_up(self.fallback_interface) and
- getattr(self, '_ephemeral_dhcp_ctx', None))
+ use_cached_ephemeral = (
+ self.distro.networking.is_up(self.fallback_interface) and
+ getattr(self, '_ephemeral_dhcp_ctx', None))
if use_cached_ephemeral:
self._report_ready(lease=self._ephemeral_dhcp_ctx.lease)
self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral
@@ -497,7 +565,8 @@ class DataSourceAzure(sources.DataSource):
self._report_ready(lease=lease)
except Exception as e:
report_diagnostic_event(
- "exception while reporting ready: %s" % e)
+ "exception while reporting ready: %s" % e,
+ logger_func=LOG.error)
raise
return crawled_data
@@ -529,14 +598,21 @@ class DataSourceAzure(sources.DataSource):
except Exception as e:
LOG.warning("Failed to get system information: %s", e)
+ self.distro.networking.blacklist_drivers = BLACKLIST_DRIVERS
+
try:
crawled_data = util.log_time(
logfunc=LOG.debug, msg='Crawl of metadata service',
func=self.crawl_metadata
)
- except sources.InvalidMetaDataException as e:
- LOG.warning('Could not crawl Azure metadata: %s', e)
+ except Exception as e:
+ report_diagnostic_event(
+ 'Could not crawl Azure metadata: %s' % e,
+ logger_func=LOG.error)
+ self._report_failure(
+ description=DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE)
return False
+
if (self.distro and self.distro.name == 'ubuntu' and
self.ds_cfg.get('apply_network_config')):
maybe_remove_ubuntu_network_config_scripts()
@@ -561,6 +637,38 @@ class DataSourceAzure(sources.DataSource):
def device_name_to_device(self, name):
return self.ds_cfg['disk_aliases'].get(name)
+ @azure_ds_telemetry_reporter
+ def get_public_ssh_keys(self):
+ """
+ Try to get the ssh keys from IMDS first, and if that fails
+ (i.e. IMDS is unavailable) then fallback to getting the ssh
+ keys from OVF.
+
+ The benefit to getting keys from IMDS is a large performance
+ advantage, so this is a strong preference. But we must keep
+ OVF as a second option for environments that don't have IMDS.
+ """
+ LOG.debug('Retrieving public SSH keys')
+ ssh_keys = []
+ try:
+ ssh_keys = [
+ public_key['keyData']
+ for public_key
+ in self.metadata['imds']['compute']['publicKeys']
+ ]
+ LOG.debug('Retrieved SSH keys from IMDS')
+ except KeyError:
+ log_msg = 'Unable to get keys from IMDS, falling back to OVF'
+ report_diagnostic_event(log_msg, logger_func=LOG.debug)
+ try:
+ ssh_keys = self.metadata['public-keys']
+ LOG.debug('Retrieved keys from OVF')
+ except KeyError:
+ log_msg = 'No keys available from OVF'
+ report_diagnostic_event(log_msg, logger_func=LOG.debug)
+
+ return ssh_keys
+
def get_config_obj(self):
return self.cfg
@@ -571,7 +679,7 @@ class DataSourceAzure(sources.DataSource):
def _iid(self, previous=None):
prev_iid_path = os.path.join(
self.paths.get_cpath('data'), 'instance-id')
- iid = util.read_dmi_data('system-uuid')
+ iid = dmi.read_dmi_data('system-uuid')
if os.path.exists(prev_iid_path):
previous = util.load_file(prev_iid_path).strip()
if is_byte_swapped(previous, iid):
@@ -592,10 +700,293 @@ class DataSourceAzure(sources.DataSource):
LOG.debug("negotiating already done for %s",
self.get_instance_id())
+ @azure_ds_telemetry_reporter
+ def _wait_for_nic_detach(self, nl_sock):
+ """Use the netlink socket provided to wait for nic detach event.
+ NOTE: The function doesn't close the socket. The caller owns closing
+ the socket and disposing it safely.
+ """
+ try:
+ ifname = None
+
+ # Preprovisioned VM will only have one NIC, and it gets
+ # detached immediately after deployment.
+ with events.ReportEventStack(
+ name="wait-for-nic-detach",
+ description=("wait for nic detach"),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_detach_event(nl_sock)
+ if ifname is None:
+ msg = ("Preprovisioned nic not detached as expected. "
+ "Proceeding without failing.")
+ report_diagnostic_event(msg, logger_func=LOG.warning)
+ else:
+ report_diagnostic_event("The preprovisioned nic %s is detached"
+ % ifname, logger_func=LOG.warning)
+ path = REPROVISION_NIC_DETACHED_MARKER_FILE
+ LOG.info("Creating a marker file for nic detached: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+ raise
+
+ @azure_ds_telemetry_reporter
+ def wait_for_link_up(self, ifname):
+ """In cases where the link state is still showing down after a nic is
+ hot-attached, we can attempt to bring it up by forcing the hv_netvsc
+ drivers to query the link state by unbinding and then binding the
+ device. This function attempts infinitely until the link is up,
+ because we cannot proceed further until we have a stable link."""
+
+ if self.distro.networking.try_set_link_up(ifname):
+ report_diagnostic_event("The link %s is already up." % ifname,
+ logger_func=LOG.info)
+ return
+
+ LOG.info("Attempting to bring %s up", ifname)
+
+ attempts = 0
+ while True:
+
+ LOG.info("Unbinding and binding the interface %s", ifname)
+ devicename = net.read_sys_net(ifname,
+ 'device/device_id').strip('{}')
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind',
+ devicename)
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/bind',
+ devicename)
+
+ attempts = attempts + 1
+ if self.distro.networking.try_set_link_up(ifname):
+ msg = "The link %s is up after %s attempts" % (ifname,
+ attempts)
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ return
+
+ sleep_duration = 1
+ msg = ("Link is not up after %d attempts with %d seconds sleep "
+ "between attempts." % (attempts, sleep_duration))
+
+ if attempts % 10 == 0:
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ else:
+ LOG.info(msg)
+
+ sleep(sleep_duration)
+
+ @azure_ds_telemetry_reporter
+ def _create_report_ready_marker(self):
+ path = REPORTED_READY_MARKER_FILE
+ LOG.info(
+ "Creating a marker file to report ready: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ report_diagnostic_event(
+ 'Successfully created reported ready marker file '
+ 'while in the preprovisioning pool.',
+ logger_func=LOG.debug)
+
+ @azure_ds_telemetry_reporter
+ def _report_ready_if_needed(self):
+ """Report ready to the platform if the marker file is not present,
+ and create the marker file.
+ """
+ have_not_reported_ready = (
+ not os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ if have_not_reported_ready:
+ report_diagnostic_event("Reporting ready before nic detach",
+ logger_func=LOG.info)
+ try:
+ with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease:
+ self._report_ready(lease=lease)
+ except Exception as e:
+ report_diagnostic_event("Exception reporting ready during "
+ "preprovisioning before nic detach: %s"
+ % e, logger_func=LOG.error)
+ raise
+ self._create_report_ready_marker()
+ else:
+ report_diagnostic_event("Already reported ready before nic detach."
+ " The marker file already exists: %s" %
+ REPORTED_READY_MARKER_FILE,
+ logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _check_if_nic_is_primary(self, ifname):
+ """Check if a given interface is the primary nic or not. If it is the
+ primary nic, then we also get the expected total nic count from IMDS.
+ IMDS will process the request and send a response only for primary NIC.
+ """
+ is_primary = False
+ expected_nic_count = -1
+ imds_md = None
+
+ # For now, only a VM's primary NIC can contact IMDS and WireServer. If
+ # DHCP fails for a NIC, we have no mechanism to determine if the NIC is
+ # primary or secondary. In this case, the desired behavior is to fail
+ # VM provisioning if there is any DHCP failure when trying to determine
+ # the primary NIC.
+ try:
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description=("obtain dhcp lease for %s when attempting to "
+ "determine primary NIC during reprovision of "
+ "a pre-provisioned VM" % ifname),
+ parent=azure_ds_reporter):
+ dhcp_ctx = EphemeralDHCPv4(
+ iface=ifname,
+ dhcp_log_func=dhcp_log_cb)
+ dhcp_ctx.obtain_lease()
+ except Exception as e:
+ report_diagnostic_event("Giving up. Failed to obtain dhcp lease "
+ "for %s when attempting to determine "
+ "primary NIC during reprovision due to %s"
+ % (ifname, e), logger_func=LOG.error)
+ raise
+
+ # Primary nic detection will be optimized in the future. The fact that
+ # primary nic is being attached first helps here. Otherwise each nic
+ # could add several seconds of delay.
+ try:
+ imds_md = get_metadata_from_imds(
+ ifname,
+ 5,
+ metadata_type.network)
+ except Exception as e:
+ LOG.warning(
+ "Failed to get network metadata using nic %s. Attempt to "
+ "contact IMDS failed with error %s. Assuming this is not the "
+ "primary nic.", ifname, e)
+ finally:
+ # If we are not the primary nic, then clean the dhcp context.
+ if imds_md is None:
+ dhcp_ctx.clean_network()
+
+ if imds_md is not None:
+ # Only primary NIC will get a response from IMDS.
+ LOG.info("%s is the primary nic", ifname)
+ is_primary = True
+
+ # If primary, set ephemeral dhcp ctx so we can report ready
+ self._ephemeral_dhcp_ctx = dhcp_ctx
+
+ # Set the expected nic count based on the response received.
+ expected_nic_count = len(
+ imds_md['interface'])
+ report_diagnostic_event("Expected nic count: %d" %
+ expected_nic_count, logger_func=LOG.info)
+
+ return is_primary, expected_nic_count
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_hot_attached_nics(self, nl_sock):
+ """Wait until all the expected nics for the vm are hot-attached.
+ The expected nic count is obtained by requesting the network metadata
+ from IMDS.
+ """
+ LOG.info("Waiting for nics to be hot-attached")
+ try:
+ # Wait for nics to be attached one at a time, until we know for
+ # sure that all nics have been attached.
+ nics_found = []
+ primary_nic_found = False
+ expected_nic_count = -1
+
+ # Wait for netlink nic attach events. After the first nic is
+ # attached, we are already in the customer vm deployment path and
+ # so eerything from then on should happen fast and avoid
+ # unnecessary delays wherever possible.
+ while True:
+ ifname = None
+ with events.ReportEventStack(
+ name="wait-for-nic-attach",
+ description=("wait for nic attach after %d nics have "
+ "been attached" % len(nics_found)),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_attach_event(nl_sock,
+ nics_found)
+
+ # wait_for_nic_attach_event guarantees that ifname it not None
+ nics_found.append(ifname)
+ report_diagnostic_event("Detected nic %s attached." % ifname,
+ logger_func=LOG.info)
+
+ # Attempt to bring the interface's operating state to
+ # UP in case it is not already.
+ self.wait_for_link_up(ifname)
+
+ # If primary nic is not found, check if this is it. The
+ # platform will attach the primary nic first so we
+ # won't be in primary_nic_found = false state for long.
+ if not primary_nic_found:
+ LOG.info("Checking if %s is the primary nic",
+ ifname)
+ (primary_nic_found, expected_nic_count) = (
+ self._check_if_nic_is_primary(ifname))
+
+ # Exit criteria: check if we've discovered all nics
+ if (expected_nic_count != -1
+ and len(nics_found) >= expected_nic_count):
+ LOG.info("Found all the nics for this VM.")
+ break
+
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_all_nics_ready(self):
+ """Wait for nic(s) to be hot-attached. There may be multiple nics
+ depending on the customer request.
+ But only primary nic would be able to communicate with wireserver
+ and IMDS. So we detect and save the primary nic to be used later.
+ """
+
+ nl_sock = None
+ try:
+ nl_sock = netlink.create_bound_netlink_socket()
+
+ report_ready_marker_present = bool(
+ os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ # Report ready if the marker file is not already present.
+ # The nic of the preprovisioned vm gets hot-detached as soon as
+ # we report ready. So no need to save the dhcp context.
+ self._report_ready_if_needed()
+
+ has_nic_been_detached = bool(
+ os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE))
+
+ if not has_nic_been_detached:
+ LOG.info("NIC has not been detached yet.")
+ self._wait_for_nic_detach(nl_sock)
+
+ # If we know that the preprovisioned nic has been detached, and we
+ # still have a fallback nic, then it means the VM must have
+ # rebooted as part of customer assignment, and all the nics have
+ # already been attached by the Azure platform. So there is no need
+ # to wait for nics to be hot-attached.
+ if not self.fallback_interface:
+ self._wait_for_hot_attached_nics(nl_sock)
+ else:
+ report_diagnostic_event("Skipping waiting for nic attach "
+ "because we already have a fallback "
+ "interface. Report Ready marker "
+ "present before detaching nics: %s" %
+ report_ready_marker_present,
+ logger_func=LOG.info)
+ except netlink.NetlinkCreateSocketError as e:
+ report_diagnostic_event(e, logger_func=LOG.warning)
+ raise
+ finally:
+ if nl_sock:
+ nl_sock.close()
+
def _poll_imds(self):
"""Poll IMDS for the new provisioning data until we get a valid
response. Then return the returned JSON object."""
- url = IMDS_URL + "reprovisiondata?api-version=2017-04-02"
+ url = metadata_type.reprovisiondata.value
headers = {"Metadata": "true"}
nl_sock = None
report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE))
@@ -611,16 +1002,14 @@ class DataSourceAzure(sources.DataSource):
if self.imds_poll_counter == self.imds_logging_threshold:
# Reducing the logging frequency as we are polling IMDS
self.imds_logging_threshold *= 2
- LOG.debug("Call to IMDS with arguments %s failed "
- "with status code %s after %s retries",
- msg, exception.code, self.imds_poll_counter)
LOG.debug("Backing off logging threshold for the same "
"exception to %d",
self.imds_logging_threshold)
report_diagnostic_event("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause,
- exception.code))
+ exception.code),
+ logger_func=LOG.debug)
self.imds_poll_counter += 1
return True
else:
@@ -629,24 +1018,41 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause,
- exception.code))
+ exception.code),
+ logger_func=LOG.warning)
return False
- LOG.debug("poll IMDS failed with an unexpected exception: %s",
- exception)
- return False
+ report_diagnostic_event(
+ "poll IMDS failed with an "
+ "unexpected exception: %s" % exception,
+ logger_func=LOG.warning)
+ return False
+
+ # When the interface is hot-attached, we would have already
+ # done dhcp and set the dhcp context. In that case, skip
+ # the attempt to do dhcp.
+ is_ephemeral_ctx_present = self._ephemeral_dhcp_ctx is not None
+ msg = ("Unexpected error. Dhcp context is not expected to be already "
+ "set when we need to wait for vnet switch")
+ if is_ephemeral_ctx_present and report_ready:
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ raise RuntimeError(msg)
- LOG.debug("Wait for vnetswitch to happen")
while True:
try:
- # Save our EphemeralDHCPv4 context to avoid repeated dhcp
- with events.ReportEventStack(
- name="obtain-dhcp-lease",
- description="obtain dhcp lease",
- parent=azure_ds_reporter):
- self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
- dhcp_log_func=dhcp_log_cb)
- lease = self._ephemeral_dhcp_ctx.obtain_lease()
+ # Since is_ephemeral_ctx_present is set only once, this ensures
+ # that with regular reprovisioning, dhcp is always done every
+ # time the loop runs.
+ if not is_ephemeral_ctx_present:
+ # Save our EphemeralDHCPv4 context to avoid repeated dhcp
+ # later when we report ready
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description="obtain dhcp lease",
+ parent=azure_ds_reporter):
+ self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
+ dhcp_log_func=dhcp_log_cb)
+ lease = self._ephemeral_dhcp_ctx.obtain_lease()
if vnet_switched:
dhcp_attempts += 1
@@ -654,19 +1060,24 @@ class DataSourceAzure(sources.DataSource):
try:
nl_sock = netlink.create_bound_netlink_socket()
except netlink.NetlinkCreateSocketError as e:
- report_diagnostic_event(e)
- LOG.warning(e)
+ report_diagnostic_event(
+ 'Failed to create bound netlink socket: %s' % e,
+ logger_func=LOG.warning)
self._ephemeral_dhcp_ctx.clean_network()
break
- path = REPORTED_READY_MARKER_FILE
- LOG.info(
- "Creating a marker file to report ready: %s", path)
- util.write_file(path, "{pid}: {time}\n".format(
- pid=os.getpid(), time=time()))
- self._report_ready(lease=lease)
+ report_ready_succeeded = self._report_ready(lease=lease)
+ if not report_ready_succeeded:
+ msg = ('Failed reporting ready while in '
+ 'the preprovisioning pool.')
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ self._ephemeral_dhcp_ctx.clean_network()
+ raise sources.InvalidMetaDataException(msg)
+
+ self._create_report_ready_marker()
report_ready = False
+ LOG.debug("Wait for vnetswitch to happen")
with events.ReportEventStack(
name="wait-for-media-disconnect-connect",
description="wait for vnet switch",
@@ -674,9 +1085,10 @@ class DataSourceAzure(sources.DataSource):
try:
netlink.wait_for_media_disconnect_connect(
nl_sock, lease['interface'])
- except AssertionError as error:
- report_diagnostic_event(error)
- LOG.error(error)
+ except AssertionError as e:
+ report_diagnostic_event(
+ 'Error while waiting for vnet switch: %s' % e,
+ logger_func=LOG.error)
break
vnet_switched = True
@@ -702,21 +1114,113 @@ class DataSourceAzure(sources.DataSource):
if vnet_switched:
report_diagnostic_event("attempted dhcp %d times after reuse" %
- dhcp_attempts)
+ dhcp_attempts,
+ logger_func=LOG.debug)
report_diagnostic_event("polled imds %d times after reuse" %
- self.imds_poll_counter)
+ self.imds_poll_counter,
+ logger_func=LOG.debug)
return return_val
@azure_ds_telemetry_reporter
- def _report_ready(self, lease):
- """Tells the fabric provisioning has completed """
+ def _report_failure(self, description=None) -> bool:
+ """Tells the Azure fabric that provisioning has failed.
+
+ @param description: A description of the error encountered.
+ @return: The success status of sending the failure signal.
+ """
+ unknown_245_key = 'unknown-245'
+
+ try:
+ if (self.distro.networking.is_up(self.fallback_interface) and
+ getattr(self, '_ephemeral_dhcp_ctx', None) and
+ getattr(self._ephemeral_dhcp_ctx, 'lease', None) and
+ unknown_245_key in self._ephemeral_dhcp_ctx.lease):
+ report_diagnostic_event(
+ 'Using cached ephemeral dhcp context '
+ 'to report failure to Azure', logger_func=LOG.debug)
+ report_failure_to_fabric(
+ dhcp_opts=self._ephemeral_dhcp_ctx.lease[unknown_245_key],
+ description=description)
+ self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral
+ return True
+ except Exception as e:
+ report_diagnostic_event(
+ 'Failed to report failure using '
+ 'cached ephemeral dhcp context: %s' % e,
+ logger_func=LOG.error)
+
+ try:
+ report_diagnostic_event(
+ 'Using new ephemeral dhcp to report failure to Azure',
+ logger_func=LOG.debug)
+ with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease:
+ report_failure_to_fabric(
+ dhcp_opts=lease[unknown_245_key],
+ description=description)
+ return True
+ except Exception as e:
+ report_diagnostic_event(
+ 'Failed to report failure using new ephemeral dhcp: %s' % e,
+ logger_func=LOG.debug)
+
+ try:
+ report_diagnostic_event(
+ 'Using fallback lease to report failure to Azure')
+ report_failure_to_fabric(
+ fallback_lease_file=self.dhclient_lease_file,
+ description=description)
+ return True
+ except Exception as e:
+ report_diagnostic_event(
+ 'Failed to report failure using fallback lease: %s' % e,
+ logger_func=LOG.debug)
+
+ return False
+
+ def _report_ready(self, lease: dict) -> bool:
+ """Tells the fabric provisioning has completed.
+
+ @param lease: dhcp lease to use for sending the ready signal.
+ @return: The success status of sending the ready signal.
+ """
try:
get_metadata_from_fabric(None, lease['unknown-245'])
- except Exception:
- LOG.warning(
- "Error communicating with Azure fabric; You may experience."
- "connectivity issues.", exc_info=True)
+ return True
+ except Exception as e:
+ report_diagnostic_event(
+ "Error communicating with Azure fabric; You may experience "
+ "connectivity issues: %s" % e, logger_func=LOG.warning)
+ return False
+
+ def _should_reprovision_after_nic_attach(self, candidate_metadata) -> bool:
+ """Whether or not we should wait for nic attach and then poll
+ IMDS for reprovisioning data. Also sets a marker file to poll IMDS.
+
+ The marker file is used for the following scenario: the VM boots into
+ wait for nic attach, which we expect to be proceeding infinitely until
+ the nic is attached. If for whatever reason the platform moves us to a
+ new host (for instance a hardware issue), we need to keep waiting.
+ However, since the VM reports ready to the Fabric, we will not attach
+ the ISO, thus cloud-init needs to have a way of knowing that it should
+ jump back into the waiting mode in order to retrieve the ovf_env.
+
+ @param candidate_metadata: Metadata obtained from reading ovf-env.
+ @return: Whether to reprovision after waiting for nics to be attached.
+ """
+ if not candidate_metadata:
+ return False
+ (_md, _userdata_raw, cfg, _files) = candidate_metadata
+ path = REPROVISION_NIC_ATTACH_MARKER_FILE
+ if (cfg.get('PreprovisionedVMType', None) == "Savable" or
+ os.path.isfile(path)):
+ if not os.path.isfile(path):
+ LOG.info("Creating a marker file to wait for nic attach: %s",
+ path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ return True
+ return False
def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.
@@ -734,6 +1238,7 @@ class DataSourceAzure(sources.DataSource):
(_md, _userdata_raw, cfg, _files) = ret
path = REPROVISION_MARKER_FILE
if (cfg.get('PreprovisionedVm') is True or
+ cfg.get('PreprovisionedVMType', None) == 'Running' or
os.path.isfile(path)):
if not os.path.isfile(path):
LOG.info("Creating a marker file to poll imds: %s",
@@ -764,7 +1269,22 @@ class DataSourceAzure(sources.DataSource):
if self.ds_cfg['agent_command'] == AGENT_START_BUILTIN:
self.bounce_network_with_azure_hostname()
- pubkey_info = self.cfg.get('_pubkeys', None)
+ pubkey_info = None
+ try:
+ public_keys = self.metadata['imds']['compute']['publicKeys']
+ LOG.debug(
+ 'Successfully retrieved %s key(s) from IMDS',
+ len(public_keys)
+ if public_keys is not None
+ else 0
+ )
+ except KeyError:
+ LOG.debug(
+ 'Unable to retrieve SSH keys from IMDS during '
+ 'negotiation, falling back to OVF'
+ )
+ pubkey_info = self.cfg.get('_pubkeys', None)
+
metadata_func = partial(get_metadata_from_fabric,
fallback_lease_file=self.
dhclient_lease_file,
@@ -779,14 +1299,13 @@ class DataSourceAzure(sources.DataSource):
except Exception as e:
report_diagnostic_event(
"Error communicating with Azure fabric; You may experience "
- "connectivity issues: %s" % e)
- LOG.warning(
- "Error communicating with Azure fabric; You may experience "
- "connectivity issues.", exc_info=True)
+ "connectivity issues: %s" % e, logger_func=LOG.warning)
return False
util.del_file(REPORTED_READY_MARKER_FILE)
util.del_file(REPROVISION_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_ATTACH_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_DETACHED_MARKER_FILE)
return fabric_data
@azure_ds_telemetry_reporter
@@ -947,9 +1466,10 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
log_pre="Azure ephemeral disk: ")
if missing:
- LOG.warning("ephemeral device '%s' did"
- " not appear after %d seconds.",
- devpath, maxwait)
+ report_diagnostic_event(
+ "ephemeral device '%s' did not appear after %d seconds." %
+ (devpath, maxwait),
+ logger_func=LOG.warning)
return
result = False
@@ -1034,7 +1554,9 @@ def pubkeys_from_crt_files(flist):
errors.append(fname)
if errors:
- LOG.warning("failed to convert the crt files to pubkey: %s", errors)
+ report_diagnostic_event(
+ "failed to convert the crt files to pubkey: %s" % errors,
+ logger_func=LOG.warning)
return pubkeys
@@ -1146,7 +1668,7 @@ def read_azure_ovf(contents):
dom = minidom.parseString(contents)
except Exception as e:
error_str = "Invalid ovf-env.xml: %s" % e
- report_diagnostic_event(error_str)
+ report_diagnostic_event(error_str, logger_func=LOG.warning)
raise BrokenAzureDataSource(error_str) from e
results = find_child(dom.documentElement,
@@ -1231,7 +1753,7 @@ def read_azure_ovf(contents):
if password:
defuser['lock_passwd'] = False
if DEF_PASSWD_REDACTION != password:
- defuser['passwd'] = encrypt_pass(password)
+ defuser['passwd'] = cfg['password'] = encrypt_pass(password)
if defuser:
cfg['system_info'] = {'default_user': defuser}
@@ -1239,34 +1761,109 @@ def read_azure_ovf(contents):
if 'ssh_pwauth' not in cfg and password:
cfg['ssh_pwauth'] = True
- cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom)
+ preprovisioning_cfg = _get_preprovisioning_cfgs(dom)
+ cfg = util.mergemanydict([cfg, preprovisioning_cfg])
return (md, ud, cfg)
@azure_ds_telemetry_reporter
-def _extract_preprovisioned_vm_setting(dom):
- """Read the preprovision flag from the ovf. It should not
- exist unless true."""
+def _get_preprovisioning_cfgs(dom):
+ """Read the preprovisioning related flags from ovf and populates a dict
+ with the info.
+
+ Two flags are in use today: PreprovisionedVm bool and
+ PreprovisionedVMType enum. In the long term, the PreprovisionedVm bool
+ will be deprecated in favor of PreprovisionedVMType string/enum.
+
+ Only these combinations of values are possible today:
+ - PreprovisionedVm=True and PreprovisionedVMType=Running
+ - PreprovisionedVm=False and PreprovisionedVMType=Savable
+ - PreprovisionedVm is missing and PreprovisionedVMType=Running/Savable
+ - PreprovisionedVm=False and PreprovisionedVMType is missing
+
+ More specifically, this will never happen:
+ - PreprovisionedVm=True and PreprovisionedVMType=Savable
+ """
+ cfg = {
+ "PreprovisionedVm": False,
+ "PreprovisionedVMType": None
+ }
+
platform_settings_section = find_child(
dom.documentElement,
lambda n: n.localName == "PlatformSettingsSection")
if not platform_settings_section or len(platform_settings_section) == 0:
LOG.debug("PlatformSettingsSection not found")
- return False
+ return cfg
platform_settings = find_child(
platform_settings_section[0],
lambda n: n.localName == "PlatformSettings")
if not platform_settings or len(platform_settings) == 0:
LOG.debug("PlatformSettings not found")
- return False
- preprovisionedVm = find_child(
+ return cfg
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ cfg["PreprovisionedVm"] = _get_preprovisionedvm_cfg_value(
+ platform_settings)
+
+ cfg["PreprovisionedVMType"] = _get_preprovisionedvmtype_cfg_value(
+ platform_settings)
+ return cfg
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvm_cfg_value(platform_settings):
+ preprovisionedVm = False
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ preprovisionedVmVal = find_child(
platform_settings[0],
lambda n: n.localName == "PreprovisionedVm")
- if not preprovisionedVm or len(preprovisionedVm) == 0:
+ if not preprovisionedVmVal or len(preprovisionedVmVal) == 0:
LOG.debug("PreprovisionedVm not found")
- return False
- return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue)
+ return preprovisionedVm
+ preprovisionedVm = util.translate_bool(
+ preprovisionedVmVal[0].firstChild.nodeValue)
+
+ report_diagnostic_event(
+ "PreprovisionedVm: %s" % preprovisionedVm, logger_func=LOG.info)
+
+ return preprovisionedVm
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvmtype_cfg_value(platform_settings):
+ preprovisionedVMType = None
+
+ # Read the PreprovisionedVMType value from the ovf. It can be
+ # 'Running' or 'Savable' or not exist. This enum value is intended to
+ # replace PreprovisionedVm bool flag in the long term.
+ # A Running VM is the same as preprovisioned VMs of today. This is
+ # equivalent to having PreprovisionedVm=True.
+ # A Savable VM is one whose nic is hot-detached immediately after it
+ # reports ready the first time to free up the network resources.
+ # Once assigned to customer, the customer-requested nics are
+ # hot-attached to it and reprovision happens like today.
+ preprovisionedVMTypeVal = find_child(
+ platform_settings[0],
+ lambda n: n.localName == "PreprovisionedVMType")
+ if (not preprovisionedVMTypeVal or len(preprovisionedVMTypeVal) == 0 or
+ preprovisionedVMTypeVal[0].firstChild is None):
+ LOG.debug("PreprovisionedVMType not found")
+ return preprovisionedVMType
+
+ preprovisionedVMType = preprovisionedVMTypeVal[0].firstChild.nodeValue
+
+ report_diagnostic_event(
+ "PreprovisionedVMType: %s" % preprovisionedVMType,
+ logger_func=LOG.info)
+
+ return preprovisionedVMType
def encrypt_pass(password, salt_id="$6$"):
@@ -1338,81 +1935,100 @@ def load_azure_ds_dir(source_dir):
return (md, ud, cfg, {'ovf-env.xml': contents})
-def parse_network_config(imds_metadata):
+@azure_ds_telemetry_reporter
+def parse_network_config(imds_metadata) -> dict:
"""Convert imds_metadata dictionary to network v2 configuration.
-
Parses network configuration from imds metadata if present or generate
fallback network config excluding mlx4_core devices.
@param: imds_metadata: Dict of content read from IMDS network service.
@return: Dictionary containing network version 2 standard configuration.
"""
- with events.ReportEventStack(
- name="parse_network_config",
- description="",
- parent=azure_ds_reporter
- ) as evt:
- if imds_metadata != sources.UNSET and imds_metadata:
- netconfig = {'version': 2, 'ethernets': {}}
- LOG.debug('Azure: generating network configuration from IMDS')
- network_metadata = imds_metadata['network']
- for idx, intf in enumerate(network_metadata['interface']):
- # First IPv4 and/or IPv6 address will be obtained via DHCP.
- # Any additional IPs of each type will be set as static
- # addresses.
- nicname = 'eth{idx}'.format(idx=idx)
- dhcp_override = {'route-metric': (idx + 1) * 100}
- dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override,
- 'dhcp6': False}
- for addr_type in ('ipv4', 'ipv6'):
- addresses = intf.get(addr_type, {}).get('ipAddress', [])
- if addr_type == 'ipv4':
- default_prefix = '24'
- else:
- default_prefix = '128'
- if addresses:
- dev_config['dhcp6'] = True
- # non-primary interfaces should have a higher
- # route-metric (cost) so default routes prefer
- # primary nic due to lower route-metric value
- dev_config['dhcp6-overrides'] = dhcp_override
- for addr in addresses[1:]:
- # Append static address config for ip > 1
- netPrefix = intf[addr_type]['subnet'][0].get(
- 'prefix', default_prefix)
- privateIp = addr['privateIpAddress']
- if not dev_config.get('addresses'):
- dev_config['addresses'] = []
- dev_config['addresses'].append(
- '{ip}/{prefix}'.format(
- ip=privateIp, prefix=netPrefix))
- if dev_config:
- mac = ':'.join(re.findall(r'..', intf['macAddress']))
- dev_config.update({
- 'match': {'macaddress': mac.lower()},
- 'set-name': nicname
- })
- # With netvsc, we can get two interfaces that
- # share the same MAC, so we need to make sure
- # our match condition also contains the driver
- driver = device_driver(nicname)
- if driver and driver == 'hv_netvsc':
- dev_config['match']['driver'] = driver
- netconfig['ethernets'][nicname] = dev_config
- evt.description = "network config from imds"
- else:
- blacklist = ['mlx4_core']
- LOG.debug('Azure: generating fallback configuration')
- # generate a network config, blacklist picking mlx4_core devs
- netconfig = net.generate_fallback_config(
- blacklist_drivers=blacklist, config_driver=True)
- evt.description = "network config from fallback"
- return netconfig
+ if imds_metadata != sources.UNSET and imds_metadata:
+ try:
+ return _generate_network_config_from_imds_metadata(imds_metadata)
+ except Exception as e:
+ LOG.error(
+ 'Failed generating network config '
+ 'from IMDS network metadata: %s', str(e))
+ try:
+ return _generate_network_config_from_fallback_config()
+ except Exception as e:
+ LOG.error('Failed generating fallback network config: %s', str(e))
+ return {}
+
+
+@azure_ds_telemetry_reporter
+def _generate_network_config_from_imds_metadata(imds_metadata) -> dict:
+ """Convert imds_metadata dictionary to network v2 configuration.
+ Parses network configuration from imds metadata.
+
+ @param: imds_metadata: Dict of content read from IMDS network service.
+ @return: Dictionary containing network version 2 standard configuration.
+ """
+ netconfig = {'version': 2, 'ethernets': {}}
+ network_metadata = imds_metadata['network']
+ for idx, intf in enumerate(network_metadata['interface']):
+ # First IPv4 and/or IPv6 address will be obtained via DHCP.
+ # Any additional IPs of each type will be set as static
+ # addresses.
+ nicname = 'eth{idx}'.format(idx=idx)
+ dhcp_override = {'route-metric': (idx + 1) * 100}
+ dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override,
+ 'dhcp6': False}
+ for addr_type in ('ipv4', 'ipv6'):
+ addresses = intf.get(addr_type, {}).get('ipAddress', [])
+ if addr_type == 'ipv4':
+ default_prefix = '24'
+ else:
+ default_prefix = '128'
+ if addresses:
+ dev_config['dhcp6'] = True
+ # non-primary interfaces should have a higher
+ # route-metric (cost) so default routes prefer
+ # primary nic due to lower route-metric value
+ dev_config['dhcp6-overrides'] = dhcp_override
+ for addr in addresses[1:]:
+ # Append static address config for ip > 1
+ netPrefix = intf[addr_type]['subnet'][0].get(
+ 'prefix', default_prefix)
+ privateIp = addr['privateIpAddress']
+ if not dev_config.get('addresses'):
+ dev_config['addresses'] = []
+ dev_config['addresses'].append(
+ '{ip}/{prefix}'.format(
+ ip=privateIp, prefix=netPrefix))
+ if dev_config:
+ mac = ':'.join(re.findall(r'..', intf['macAddress']))
+ dev_config.update({
+ 'match': {'macaddress': mac.lower()},
+ 'set-name': nicname
+ })
+ # With netvsc, we can get two interfaces that
+ # share the same MAC, so we need to make sure
+ # our match condition also contains the driver
+ driver = device_driver(nicname)
+ if driver and driver == 'hv_netvsc':
+ dev_config['match']['driver'] = driver
+ netconfig['ethernets'][nicname] = dev_config
+ return netconfig
+
+
+@azure_ds_telemetry_reporter
+def _generate_network_config_from_fallback_config() -> dict:
+ """Generate fallback network config excluding blacklisted devices.
+
+ @return: Dictionary containing network version 2 standard configuration.
+ """
+ return net.generate_fallback_config(
+ blacklist_drivers=BLACKLIST_DRIVERS, config_driver=True)
@azure_ds_telemetry_reporter
-def get_metadata_from_imds(fallback_nic, retries):
- """Query Azure's network metadata service, returning a dictionary.
+def get_metadata_from_imds(fallback_nic,
+ retries,
+ md_type=metadata_type.compute):
+ """Query Azure's instance metadata service, returning a dictionary.
If network is not up, setup ephemeral dhcp on fallback_nic to talk to the
IMDS. For more info on IMDS:
@@ -1427,7 +2043,7 @@ def get_metadata_from_imds(fallback_nic, retries):
"""
kwargs = {'logfunc': LOG.debug,
'msg': 'Crawl of Azure Instance Metadata Service (IMDS)',
- 'func': _get_metadata_from_imds, 'args': (retries,)}
+ 'func': _get_metadata_from_imds, 'args': (retries, md_type,)}
if net.is_up(fallback_nic):
return util.log_time(**kwargs)
else:
@@ -1436,23 +2052,26 @@ def get_metadata_from_imds(fallback_nic, retries):
azure_ds_reporter, fallback_nic):
return util.log_time(**kwargs)
except Exception as e:
- report_diagnostic_event("exception while getting metadata: %s" % e)
+ report_diagnostic_event(
+ "exception while getting metadata: %s" % e,
+ logger_func=LOG.warning)
raise
@azure_ds_telemetry_reporter
-def _get_metadata_from_imds(retries):
+def _get_metadata_from_imds(retries, md_type=metadata_type.compute):
- url = IMDS_URL + "instance?api-version=2017-12-01"
+ url = md_type.value
headers = {"Metadata": "true"}
try:
response = readurl(
url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers,
retries=retries, exception_cb=retry_on_url_exc)
except Exception as e:
- msg = 'Ignoring IMDS instance metadata: %s' % e
- report_diagnostic_event(msg)
- LOG.debug(msg)
+ report_diagnostic_event(
+ 'Ignoring IMDS instance metadata. '
+ 'Get metadata from IMDS failed: %s' % e,
+ logger_func=LOG.warning)
return {}
try:
from json.decoder import JSONDecodeError
@@ -1463,9 +2082,10 @@ def _get_metadata_from_imds(retries):
try:
return util.load_json(str(response))
except json_decode_error as e:
- report_diagnostic_event('non-json imds response' % e)
- LOG.warning(
- 'Ignoring non-json IMDS instance metadata: %s', str(response))
+ report_diagnostic_event(
+ 'Ignoring non-json IMDS instance metadata response: %s. '
+ 'Loading non-json IMDS response failed: %s' % (str(response), e),
+ logger_func=LOG.warning)
return {}
@@ -1513,13 +2133,12 @@ def _is_platform_viable(seed_dir):
description="found azure asset tag",
parent=azure_ds_reporter
) as evt:
- asset_tag = util.read_dmi_data('chassis-asset-tag')
+ asset_tag = dmi.read_dmi_data('chassis-asset-tag')
if asset_tag == AZURE_CHASSIS_ASSET_TAG:
return True
msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag
- LOG.debug(msg)
evt.description = msg
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.debug)
if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')):
return True
return False
diff --git a/cloudinit/sources/DataSourceBigstep.py b/cloudinit/sources/DataSourceBigstep.py
index 52fff20a..63435279 100644
--- a/cloudinit/sources/DataSourceBigstep.py
+++ b/cloudinit/sources/DataSourceBigstep.py
@@ -7,13 +7,10 @@
import errno
import json
-from cloudinit import log as logging
from cloudinit import sources
from cloudinit import url_helper
from cloudinit import util
-LOG = logging.getLogger(__name__)
-
class DataSourceBigstep(sources.DataSource):
diff --git a/cloudinit/sources/DataSourceCloudSigma.py b/cloudinit/sources/DataSourceCloudSigma.py
index df88f677..f63baf74 100644
--- a/cloudinit/sources/DataSourceCloudSigma.py
+++ b/cloudinit/sources/DataSourceCloudSigma.py
@@ -9,9 +9,9 @@ import re
from cloudinit.cs_utils import Cepko, SERIAL_PORT
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import sources
-from cloudinit import util
LOG = logging.getLogger(__name__)
@@ -38,7 +38,7 @@ class DataSourceCloudSigma(sources.DataSource):
"""
LOG.debug("determining hypervisor product name via dmi data")
- sys_product_name = util.read_dmi_data("system-product-name")
+ sys_product_name = dmi.read_dmi_data("system-product-name")
if not sys_product_name:
LOG.debug("system-product-name not available in dmi data")
return False
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 1d09c12a..1930a509 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -11,6 +11,7 @@
import os
import time
+from cloudinit import dmi
from cloudinit import ec2_utils as ec2
from cloudinit import log as logging
from cloudinit import net
@@ -699,26 +700,26 @@ def _collect_platform_data():
uuid = util.load_file("/sys/hypervisor/uuid").strip()
data['uuid_source'] = 'hypervisor'
except Exception:
- uuid = util.read_dmi_data('system-uuid')
+ uuid = dmi.read_dmi_data('system-uuid')
data['uuid_source'] = 'dmi'
if uuid is None:
uuid = ''
data['uuid'] = uuid.lower()
- serial = util.read_dmi_data('system-serial-number')
+ serial = dmi.read_dmi_data('system-serial-number')
if serial is None:
serial = ''
data['serial'] = serial.lower()
- asset_tag = util.read_dmi_data('chassis-asset-tag')
+ asset_tag = dmi.read_dmi_data('chassis-asset-tag')
if asset_tag is None:
asset_tag = ''
data['asset_tag'] = asset_tag.lower()
- vendor = util.read_dmi_data('system-manufacturer')
+ vendor = dmi.read_dmi_data('system-manufacturer')
data['vendor'] = (vendor if vendor else '').lower()
return data
diff --git a/cloudinit/sources/DataSourceExoscale.py b/cloudinit/sources/DataSourceExoscale.py
index d59aefd1..adee6d79 100644
--- a/cloudinit/sources/DataSourceExoscale.py
+++ b/cloudinit/sources/DataSourceExoscale.py
@@ -3,6 +3,7 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
+from cloudinit import dmi
from cloudinit import ec2_utils as ec2
from cloudinit import log as logging
from cloudinit import sources
@@ -135,7 +136,7 @@ class DataSourceExoscale(sources.DataSource):
return self.extra_config
def _is_platform_viable(self):
- return util.read_dmi_data('system-product-name').startswith(
+ return dmi.read_dmi_data('system-product-name').startswith(
EXOSCALE_DMI_NAME)
diff --git a/cloudinit/sources/DataSourceGCE.py b/cloudinit/sources/DataSourceGCE.py
index 0ec5f6ec..746caddb 100644
--- a/cloudinit/sources/DataSourceGCE.py
+++ b/cloudinit/sources/DataSourceGCE.py
@@ -7,6 +7,7 @@ import json
from base64 import b64decode
+from cloudinit import dmi
from cloudinit.distros import ug_util
from cloudinit import log as logging
from cloudinit import sources
@@ -248,12 +249,12 @@ def read_md(address=None, platform_check=True):
def platform_reports_gce():
- pname = util.read_dmi_data('system-product-name') or "N/A"
+ pname = dmi.read_dmi_data('system-product-name') or "N/A"
if pname == "Google Compute Engine":
return True
# system-product-name is not always guaranteed (LP: #1674861)
- serial = util.read_dmi_data('system-serial-number') or "N/A"
+ serial = dmi.read_dmi_data('system-serial-number') or "N/A"
if serial.startswith("GoogleCloud-"):
return True
diff --git a/cloudinit/sources/DataSourceHetzner.py b/cloudinit/sources/DataSourceHetzner.py
index a86035e0..c7c88dd7 100644
--- a/cloudinit/sources/DataSourceHetzner.py
+++ b/cloudinit/sources/DataSourceHetzner.py
@@ -3,9 +3,10 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
#
-"""Hetzner Cloud API Documentation.
+"""Hetzner Cloud API Documentation
https://docs.hetzner.cloud/"""
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import net as cloudnet
from cloudinit import sources
@@ -46,9 +47,12 @@ class DataSourceHetzner(sources.DataSource):
self._network_config = None
self.dsmode = sources.DSMODE_NETWORK
- def get_data(self):
- if not on_hetzner():
+ def _get_data(self):
+ (on_hetzner, serial) = get_hcloud_data()
+
+ if not on_hetzner:
return False
+
nic = cloudnet.find_fallback_nic()
with cloudnet.EphemeralIPv4Network(nic, "169.254.0.1", 16,
"169.254.255.255"):
@@ -78,8 +82,18 @@ class DataSourceHetzner(sources.DataSource):
self.metadata['public-keys'] = md.get('public-keys', None)
self.vendordata_raw = md.get("vendor_data", None)
+ # instance-id and serial from SMBIOS should be identical
+ if self.get_instance_id() != serial:
+ raise RuntimeError(
+ "SMBIOS serial does not match instance ID from metadata"
+ )
+
return True
+ def check_instance_id(self, sys_cfg):
+ return sources.instance_id_matches_system_uuid(
+ self.get_instance_id(), 'system-serial-number')
+
@property
def network_config(self):
"""Configure the networking. This needs to be done each boot, since
@@ -99,8 +113,18 @@ class DataSourceHetzner(sources.DataSource):
return self._network_config
-def on_hetzner():
- return util.read_dmi_data('system-manufacturer') == "Hetzner"
+def get_hcloud_data():
+ vendor_name = dmi.read_dmi_data('system-manufacturer')
+ if vendor_name != "Hetzner":
+ return (False, None)
+
+ serial = dmi.read_dmi_data("system-serial-number")
+ if serial:
+ LOG.debug("Running on Hetzner Cloud: serial=%s", serial)
+ else:
+ raise RuntimeError("Hetzner Cloud detected, but no serial found")
+
+ return (True, serial)
# Used to match classes to dependencies
diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py
index e408d730..a126aad3 100644
--- a/cloudinit/sources/DataSourceNoCloud.py
+++ b/cloudinit/sources/DataSourceNoCloud.py
@@ -11,6 +11,7 @@
import errno
import os
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit.net import eni
from cloudinit import sources
@@ -61,7 +62,7 @@ class DataSourceNoCloud(sources.DataSource):
# Parse the system serial label from dmi. If not empty, try parsing
# like the commandline
md = {}
- serial = util.read_dmi_data('system-serial-number')
+ serial = dmi.read_dmi_data('system-serial-number')
if serial and load_cmdline_data(md, serial):
found.append("dmi")
mydata = _merge_new_seed(mydata, {'meta-data': md})
@@ -157,13 +158,14 @@ class DataSourceNoCloud(sources.DataSource):
# This could throw errors, but the user told us to do it
# so if errors are raised, let them raise
- (md_seed, ud) = util.read_seeded(seedfrom, timeout=None)
+ (md_seed, ud, vd) = util.read_seeded(seedfrom, timeout=None)
LOG.debug("Using seeded cache data from %s", seedfrom)
# Values in the command line override those from the seed
mydata['meta-data'] = util.mergemanydict([mydata['meta-data'],
md_seed])
mydata['user-data'] = ud
+ mydata['vendor-data'] = vd
found.append(seedfrom)
# Now that we have exhausted any other places merge in the defaults
diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py
index e6250801..b7656ac5 100644
--- a/cloudinit/sources/DataSourceNone.py
+++ b/cloudinit/sources/DataSourceNone.py
@@ -4,11 +4,8 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
-from cloudinit import log as logging
from cloudinit import sources
-LOG = logging.getLogger(__name__)
-
class DataSourceNone(sources.DataSource):
diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py
index e53d2eb1..741c140a 100644
--- a/cloudinit/sources/DataSourceOVF.py
+++ b/cloudinit/sources/DataSourceOVF.py
@@ -14,6 +14,7 @@ import re
import time
from xml.dom import minidom
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import subp
@@ -73,6 +74,7 @@ class DataSourceOVF(sources.DataSource):
found = []
md = {}
ud = ""
+ vd = ""
vmwareImcConfigFilePath = None
nicspath = None
@@ -82,7 +84,7 @@ class DataSourceOVF(sources.DataSource):
(seedfile, contents) = get_ovf_env(self.paths.seed_dir)
- system_type = util.read_dmi_data("system-product-name")
+ system_type = dmi.read_dmi_data("system-product-name")
if system_type is None:
LOG.debug("No system-product-name found")
@@ -304,7 +306,7 @@ class DataSourceOVF(sources.DataSource):
seedfrom, self)
return False
- (md_seed, ud) = util.read_seeded(seedfrom, timeout=None)
+ (md_seed, ud, vd) = util.read_seeded(seedfrom, timeout=None)
LOG.debug("Using seeded cache data from %s", seedfrom)
md = util.mergemanydict([md, md_seed])
@@ -316,11 +318,12 @@ class DataSourceOVF(sources.DataSource):
self.seed = ",".join(found)
self.metadata = md
self.userdata_raw = ud
+ self.vendordata_raw = vd
self.cfg = cfg
return True
def _get_subplatform(self):
- system_type = util.read_dmi_data("system-product-name").lower()
+ system_type = dmi.read_dmi_data("system-product-name").lower()
if system_type == 'vmware':
return 'vmware (%s)' % self.seed
return 'ovf (%s)' % self.seed
diff --git a/cloudinit/sources/DataSourceOpenNebula.py b/cloudinit/sources/DataSourceOpenNebula.py
index 45481938..730ec586 100644
--- a/cloudinit/sources/DataSourceOpenNebula.py
+++ b/cloudinit/sources/DataSourceOpenNebula.py
@@ -350,7 +350,8 @@ def parse_shell_config(content, keylist=None, bash=None, asuser=None,
# exclude vars in bash that change on their own or that we used
excluded = (
"EPOCHREALTIME", "EPOCHSECONDS", "RANDOM", "LINENO", "SECONDS", "_",
- "__v")
+ "SRANDOM", "__v",
+ )
preset = {}
ret = {}
target = None
diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py
index d4b43f44..b3406c67 100644
--- a/cloudinit/sources/DataSourceOpenStack.py
+++ b/cloudinit/sources/DataSourceOpenStack.py
@@ -6,6 +6,7 @@
import time
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError
from cloudinit import sources
@@ -32,7 +33,8 @@ DMI_ASSET_TAG_OPENTELEKOM = 'OpenTelekomCloud'
# See github.com/sapcc/helm-charts/blob/master/openstack/nova/values.yaml
# -> compute.defaults.vmware.smbios_asset_tag for this value
DMI_ASSET_TAG_SAPCCLOUD = 'SAP CCloud VM'
-VALID_DMI_ASSET_TAGS = [DMI_ASSET_TAG_OPENTELEKOM, DMI_ASSET_TAG_SAPCCLOUD]
+VALID_DMI_ASSET_TAGS = VALID_DMI_PRODUCT_NAMES
+VALID_DMI_ASSET_TAGS += [DMI_ASSET_TAG_OPENTELEKOM, DMI_ASSET_TAG_SAPCCLOUD]
class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):
@@ -224,10 +226,10 @@ def detect_openstack(accept_oracle=False):
"""Return True when a potential OpenStack platform is detected."""
if not util.is_x86():
return True # Non-Intel cpus don't properly report dmi product names
- product_name = util.read_dmi_data('system-product-name')
+ product_name = dmi.read_dmi_data('system-product-name')
if product_name in VALID_DMI_PRODUCT_NAMES:
return True
- elif util.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS:
+ elif dmi.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS:
return True
elif accept_oracle and oracle._is_platform_viable():
return True
diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py
index 20d6487d..bf81b10b 100644
--- a/cloudinit/sources/DataSourceOracle.py
+++ b/cloudinit/sources/DataSourceOracle.py
@@ -17,6 +17,7 @@ import base64
from collections import namedtuple
from contextlib import suppress as noop
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import net, sources, util
from cloudinit.net import (
@@ -273,12 +274,12 @@ class DataSourceOracle(sources.DataSource):
def _read_system_uuid():
- sys_uuid = util.read_dmi_data('system-uuid')
+ sys_uuid = dmi.read_dmi_data('system-uuid')
return None if sys_uuid is None else sys_uuid.lower()
def _is_platform_viable():
- asset_tag = util.read_dmi_data('chassis-asset-tag')
+ asset_tag = dmi.read_dmi_data('chassis-asset-tag')
return asset_tag == CHASSIS_ASSET_TAG
diff --git a/cloudinit/sources/DataSourceRbxCloud.py b/cloudinit/sources/DataSourceRbxCloud.py
index e064c8d6..0b8994bf 100644
--- a/cloudinit/sources/DataSourceRbxCloud.py
+++ b/cloudinit/sources/DataSourceRbxCloud.py
@@ -71,11 +71,13 @@ def gratuitous_arp(items, distro):
def get_md():
- rbx_data = None
+ """Returns False (not found or error) or a dictionary with metadata."""
devices = set(
util.find_devs_with('LABEL=CLOUDMD') +
util.find_devs_with('LABEL=cloudmd')
)
+ if not devices:
+ return False
for device in devices:
try:
rbx_data = util.mount_cb(
@@ -84,17 +86,17 @@ def get_md():
mtype=['vfat', 'fat', 'msdosfs']
)
if rbx_data:
- break
+ return rbx_data
except OSError as err:
if err.errno != errno.ENOENT:
raise
except util.MountFailedError:
util.logexc(LOG, "Failed to mount %s when looking for user "
"data", device)
- if not rbx_data:
- util.logexc(LOG, "Failed to load metadata and userdata")
- return False
- return rbx_data
+
+ LOG.debug("Did not find RbxCloud data, searched devices: %s",
+ ",".join(devices))
+ return False
def generate_network_config(netadps):
@@ -223,6 +225,8 @@ class DataSourceRbxCloud(sources.DataSource):
is used to perform instance configuration.
"""
rbx_data = get_md()
+ if rbx_data is False:
+ return False
self.userdata_raw = rbx_data['userdata']
self.metadata = rbx_data['metadata']
self.gratuitous_arp = rbx_data['gratuitous_arp']
diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py
index 83c2bf65..41be7665 100644
--- a/cloudinit/sources/DataSourceScaleway.py
+++ b/cloudinit/sources/DataSourceScaleway.py
@@ -25,6 +25,7 @@ import requests
from requests.packages.urllib3.connection import HTTPConnection
from requests.packages.urllib3.poolmanager import PoolManager
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import url_helper
@@ -56,7 +57,7 @@ def on_scaleway():
* the initrd created the file /var/run/scaleway.
* "scaleway" is in the kernel cmdline.
"""
- vendor_name = util.read_dmi_data('system-manufacturer')
+ vendor_name = dmi.read_dmi_data('system-manufacturer')
if vendor_name == 'Scaleway':
return True
diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py
index f1f903bc..fd292baa 100644
--- a/cloudinit/sources/DataSourceSmartOS.py
+++ b/cloudinit/sources/DataSourceSmartOS.py
@@ -30,6 +30,7 @@ import random
import re
import socket
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import serial
from cloudinit import sources
@@ -767,7 +768,7 @@ def get_smartos_environ(uname_version=None, product_name=None):
return SMARTOS_ENV_LX_BRAND
if product_name is None:
- system_type = util.read_dmi_data("system-product-name")
+ system_type = dmi.read_dmi_data("system-product-name")
else:
system_type = product_name
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index c4d60fff..9dccc687 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -14,6 +14,7 @@ import json
import os
from collections import namedtuple
+from cloudinit import dmi
from cloudinit import importer
from cloudinit import log as logging
from cloudinit import net
@@ -809,7 +810,7 @@ def instance_id_matches_system_uuid(instance_id, field='system-uuid'):
if not instance_id:
return False
- dmi_value = util.read_dmi_data(field)
+ dmi_value = dmi.read_dmi_data(field)
if not dmi_value:
return False
return instance_id.lower() == dmi_value.lower()
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index b968a96f..d3055d08 100755
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -9,6 +9,7 @@ import struct
import time
import textwrap
import zlib
+from errno import ENOENT
from cloudinit.settings import CFG_BUILTIN
from cloudinit.net import dhcp
@@ -16,6 +17,7 @@ from cloudinit import stages
from cloudinit import temp_utils
from contextlib import contextmanager
from xml.etree import ElementTree
+from xml.sax.saxutils import escape
from cloudinit import subp
from cloudinit import url_helper
@@ -41,13 +43,19 @@ COMPRESSED_EVENT_TYPE = 'compressed'
# cloud-init.log files where the P95 of the file sizes was 537KB and the time
# consumed to dump 500KB file was (P95:76, P99:233, P99.9:1170) in ms
MAX_LOG_TO_KVP_LENGTH = 512000
-# Marker file to indicate whether cloud-init.log is pushed to KVP
-LOG_PUSHED_TO_KVP_MARKER_FILE = '/var/lib/cloud/data/log_pushed_to_kvp'
+# File to store the last byte of cloud-init.log that was pushed to KVP. This
+# file will be deleted with every VM reboot.
+LOG_PUSHED_TO_KVP_INDEX_FILE = '/run/cloud-init/log_pushed_to_kvp_index'
azure_ds_reporter = events.ReportEventStack(
name="azure-ds",
description="initialize reporter for azure ds",
reporting_enabled=True)
+DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE = (
+ 'The VM encountered an error during deployment. '
+ 'Please visit https://aka.ms/linuxprovisioningerror '
+ 'for more information on remediation.')
+
def azure_ds_telemetry_reporter(func):
def impl(*args, **kwargs):
@@ -180,12 +188,15 @@ def get_system_info():
return evt
-def report_diagnostic_event(str):
+def report_diagnostic_event(
+ msg: str, *, logger_func=None) -> events.ReportingEvent:
"""Report a diagnostic event"""
+ if callable(logger_func):
+ logger_func(msg)
evt = events.ReportingEvent(
DIAGNOSTIC_EVENT_TYPE, 'diagnostic message',
- str, events.DEFAULT_EVENT_ORIGIN)
- events.report_event(evt)
+ msg, events.DEFAULT_EVENT_ORIGIN)
+ events.report_event(evt, excluded_handler_types={"log"})
# return the event for unit testing purpose
return evt
@@ -211,27 +222,58 @@ def report_compressed_event(event_name, event_content):
def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):
"""Push a portion of cloud-init.log file or the whole file to KVP
based on the file size.
- If called more than once, it skips pushing the log file to KVP again."""
+ The first time this function is called after VM boot, It will push the last
+ n bytes of the log file such that n < MAX_LOG_TO_KVP_LENGTH
+ If called again on the same boot, it continues from where it left off.
+ In addition to cloud-init.log, dmesg log will also be collected."""
- log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE))
- if log_pushed_to_kvp:
- report_diagnostic_event("cloud-init.log is already pushed to KVP")
- return
+ start_index = get_last_log_byte_pushed_to_kvp_index()
LOG.debug("Dumping cloud-init.log file to KVP")
try:
with open(file_name, "rb") as f:
f.seek(0, os.SEEK_END)
- seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0)
+ seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, start_index)
report_diagnostic_event(
- "Dumping last {} bytes of cloud-init.log file to KVP".format(
- f.tell() - seek_index))
+ "Dumping last {0} bytes of cloud-init.log file to KVP starting"
+ " from index: {1}".format(f.tell() - seek_index, seek_index),
+ logger_func=LOG.debug)
f.seek(seek_index, os.SEEK_SET)
report_compressed_event("cloud-init.log", f.read())
- util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '')
+ util.write_file(LOG_PUSHED_TO_KVP_INDEX_FILE, str(f.tell()))
+ except Exception as ex:
+ report_diagnostic_event(
+ "Exception when dumping log file: %s" % repr(ex),
+ logger_func=LOG.warning)
+
+ LOG.debug("Dumping dmesg log to KVP")
+ try:
+ out, _ = subp.subp(['dmesg'], decode=False, capture=True)
+ report_compressed_event("dmesg", out)
except Exception as ex:
- report_diagnostic_event("Exception when dumping log file: %s" %
- repr(ex))
+ report_diagnostic_event(
+ "Exception when dumping dmesg log: %s" % repr(ex),
+ logger_func=LOG.warning)
+
+
+@azure_ds_telemetry_reporter
+def get_last_log_byte_pushed_to_kvp_index():
+ try:
+ with open(LOG_PUSHED_TO_KVP_INDEX_FILE, "r") as f:
+ return int(f.read())
+ except IOError as e:
+ if e.errno != ENOENT:
+ report_diagnostic_event("Reading LOG_PUSHED_TO_KVP_INDEX_FILE"
+ " failed: %s." % repr(e),
+ logger_func=LOG.warning)
+ except ValueError as e:
+ report_diagnostic_event("Invalid value in LOG_PUSHED_TO_KVP_INDEX_FILE"
+ ": %s." % repr(e),
+ logger_func=LOG.warning)
+ except Exception as e:
+ report_diagnostic_event("Failed to get the last log byte pushed to KVP"
+ ": %s." % repr(e), logger_func=LOG.warning)
+ return 0
@contextmanager
@@ -252,6 +294,54 @@ def _get_dhcp_endpoint_option_name():
return azure_endpoint
+@azure_ds_telemetry_reporter
+def http_with_retries(url, **kwargs) -> str:
+ """Wrapper around url_helper.readurl() with custom telemetry logging
+ that url_helper.readurl() does not provide.
+ """
+ exc = None
+
+ max_readurl_attempts = 240
+ default_readurl_timeout = 5
+ periodic_logging_attempts = 12
+
+ if 'timeout' not in kwargs:
+ kwargs['timeout'] = default_readurl_timeout
+
+ # remove kwargs that cause url_helper.readurl to retry,
+ # since we are already implementing our own retry logic.
+ if kwargs.pop('retries', None):
+ LOG.warning(
+ 'Ignoring retries kwarg passed in for '
+ 'communication with Azure endpoint.')
+ if kwargs.pop('infinite', None):
+ LOG.warning(
+ 'Ignoring infinite kwarg passed in for communication '
+ 'with Azure endpoint.')
+
+ for attempt in range(1, max_readurl_attempts + 1):
+ try:
+ ret = url_helper.readurl(url, **kwargs)
+
+ report_diagnostic_event(
+ 'Successful HTTP request with Azure endpoint %s after '
+ '%d attempts' % (url, attempt),
+ logger_func=LOG.debug)
+
+ return ret
+
+ except Exception as e:
+ exc = e
+ if attempt % periodic_logging_attempts == 0:
+ report_diagnostic_event(
+ 'Failed HTTP request with Azure endpoint %s during '
+ 'attempt %d with exception: %s' %
+ (url, attempt, e),
+ logger_func=LOG.debug)
+
+ raise exc
+
+
class AzureEndpointHttpClient:
headers = {
@@ -270,16 +360,15 @@ class AzureEndpointHttpClient:
if secure:
headers = self.headers.copy()
headers.update(self.extra_secure_headers)
- return url_helper.readurl(url, headers=headers,
- timeout=5, retries=10, sec_between=5)
+ return http_with_retries(url, headers=headers)
def post(self, url, data=None, extra_headers=None):
headers = self.headers
if extra_headers is not None:
headers = self.headers.copy()
headers.update(extra_headers)
- return url_helper.readurl(url, data=data, headers=headers,
- timeout=5, retries=10, sec_between=5)
+ return http_with_retries(
+ url, data=data, headers=headers)
class InvalidGoalStateXMLException(Exception):
@@ -288,11 +377,16 @@ class InvalidGoalStateXMLException(Exception):
class GoalState:
- def __init__(self, unparsed_xml, azure_endpoint_client):
+ def __init__(
+ self,
+ unparsed_xml: str,
+ azure_endpoint_client: AzureEndpointHttpClient,
+ need_certificate: bool = True) -> None:
"""Parses a GoalState XML string and returns a GoalState object.
@param unparsed_xml: string representing a GoalState XML.
- @param azure_endpoint_client: instance of AzureEndpointHttpClient
+ @param azure_endpoint_client: instance of AzureEndpointHttpClient.
+ @param need_certificate: switch to know if certificates is needed.
@return: GoalState object representing the GoalState XML string.
"""
self.azure_endpoint_client = azure_endpoint_client
@@ -300,9 +394,9 @@ class GoalState:
try:
self.root = ElementTree.fromstring(unparsed_xml)
except ElementTree.ParseError as e:
- msg = 'Failed to parse GoalState XML: %s'
- LOG.warning(msg, e)
- report_diagnostic_event(msg % (e,))
+ report_diagnostic_event(
+ 'Failed to parse GoalState XML: %s' % e,
+ logger_func=LOG.warning)
raise
self.container_id = self._text_from_xpath('./Container/ContainerId')
@@ -312,16 +406,15 @@ class GoalState:
for attr in ("container_id", "instance_id", "incarnation"):
if getattr(self, attr) is None:
- msg = 'Missing %s in GoalState XML'
- LOG.warning(msg, attr)
- report_diagnostic_event(msg % (attr,))
+ msg = 'Missing %s in GoalState XML' % attr
+ report_diagnostic_event(msg, logger_func=LOG.warning)
raise InvalidGoalStateXMLException(msg)
self.certificates_xml = None
url = self._text_from_xpath(
'./Container/RoleInstanceList/RoleInstance'
'/Configuration/Certificates')
- if url is not None:
+ if url is not None and need_certificate:
with events.ReportEventStack(
name="get-certificates-xml",
description="get certificates xml",
@@ -349,12 +442,20 @@ class OpenSSLManager:
def __init__(self):
self.tmpdir = temp_utils.mkdtemp()
- self.certificate = None
+ self._certificate = None
self.generate_certificate()
def clean_up(self):
util.del_dir(self.tmpdir)
+ @property
+ def certificate(self):
+ return self._certificate
+
+ @certificate.setter
+ def certificate(self, value):
+ self._certificate = value
+
@azure_ds_telemetry_reporter
def generate_certificate(self):
LOG.debug('Generating certificate for communication with fabric...')
@@ -477,8 +578,15 @@ class GoalStateHealthReporter:
''')
PROVISIONING_SUCCESS_STATUS = 'Ready'
+ PROVISIONING_NOT_READY_STATUS = 'NotReady'
+ PROVISIONING_FAILURE_SUBSTATUS = 'ProvisioningFailed'
+
+ HEALTH_REPORT_DESCRIPTION_TRIM_LEN = 512
- def __init__(self, goal_state, azure_endpoint_client, endpoint):
+ def __init__(
+ self, goal_state: GoalState,
+ azure_endpoint_client: AzureEndpointHttpClient,
+ endpoint: str) -> None:
"""Creates instance that will report provisioning status to an endpoint
@param goal_state: An instance of class GoalState that contains
@@ -495,7 +603,7 @@ class GoalStateHealthReporter:
self._endpoint = endpoint
@azure_ds_telemetry_reporter
- def send_ready_signal(self):
+ def send_ready_signal(self) -> None:
document = self.build_report(
incarnation=self._goal_state.incarnation,
container_id=self._goal_state.container_id,
@@ -505,32 +613,52 @@ class GoalStateHealthReporter:
try:
self._post_health_report(document=document)
except Exception as e:
- msg = "exception while reporting ready: %s" % e
- LOG.error(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ "exception while reporting ready: %s" % e,
+ logger_func=LOG.error)
raise
LOG.info('Reported ready to Azure fabric.')
+ @azure_ds_telemetry_reporter
+ def send_failure_signal(self, description: str) -> None:
+ document = self.build_report(
+ incarnation=self._goal_state.incarnation,
+ container_id=self._goal_state.container_id,
+ instance_id=self._goal_state.instance_id,
+ status=self.PROVISIONING_NOT_READY_STATUS,
+ substatus=self.PROVISIONING_FAILURE_SUBSTATUS,
+ description=description)
+ try:
+ self._post_health_report(document=document)
+ except Exception as e:
+ msg = "exception while reporting failure: %s" % e
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ raise
+
+ LOG.warning('Reported failure to Azure fabric.')
+
def build_report(
- self, incarnation, container_id, instance_id,
- status, substatus=None, description=None):
+ self, incarnation: str, container_id: str, instance_id: str,
+ status: str, substatus=None, description=None) -> str:
health_detail = ''
if substatus is not None:
health_detail = self.HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE.format(
- health_substatus=substatus, health_description=description)
+ health_substatus=escape(substatus),
+ health_description=escape(
+ description[:self.HEALTH_REPORT_DESCRIPTION_TRIM_LEN]))
health_report = self.HEALTH_REPORT_XML_TEMPLATE.format(
- incarnation=incarnation,
- container_id=container_id,
- instance_id=instance_id,
- health_status=status,
+ incarnation=escape(str(incarnation)),
+ container_id=escape(container_id),
+ instance_id=escape(instance_id),
+ health_status=escape(status),
health_detail_subsection=health_detail)
return health_report
@azure_ds_telemetry_reporter
- def _post_health_report(self, document):
+ def _post_health_report(self, document: str) -> None:
push_log_to_kvp()
# Whenever report_diagnostic_event(diagnostic_msg) is invoked in code,
@@ -690,43 +818,52 @@ class WALinuxAgentShim:
value = dhcp245
LOG.debug("Using Azure Endpoint from dhcp options")
if value is None:
- report_diagnostic_event("No Azure endpoint from dhcp options")
- LOG.debug('Finding Azure endpoint from networkd...')
+ report_diagnostic_event(
+ 'No Azure endpoint from dhcp options. '
+ 'Finding Azure endpoint from networkd...',
+ logger_func=LOG.debug)
value = WALinuxAgentShim._networkd_get_value_from_leases()
if value is None:
# Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json
# a dhclient exit hook that calls cloud-init-dhclient-hook
- report_diagnostic_event("No Azure endpoint from networkd")
- LOG.debug('Finding Azure endpoint from hook json...')
+ report_diagnostic_event(
+ 'No Azure endpoint from networkd. '
+ 'Finding Azure endpoint from hook json...',
+ logger_func=LOG.debug)
dhcp_options = WALinuxAgentShim._load_dhclient_json()
value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options)
if value is None:
# Fallback and check the leases file if unsuccessful
- report_diagnostic_event("No Azure endpoint from dhclient logs")
- LOG.debug("Unable to find endpoint in dhclient logs. "
- " Falling back to check lease files")
+ report_diagnostic_event(
+ 'No Azure endpoint from dhclient logs. '
+ 'Unable to find endpoint in dhclient logs. '
+ 'Falling back to check lease files',
+ logger_func=LOG.debug)
if fallback_lease_file is None:
- LOG.warning("No fallback lease file was specified.")
+ report_diagnostic_event(
+ 'No fallback lease file was specified.',
+ logger_func=LOG.warning)
value = None
else:
- LOG.debug("Looking for endpoint in lease file %s",
- fallback_lease_file)
+ report_diagnostic_event(
+ 'Looking for endpoint in lease file %s'
+ % fallback_lease_file, logger_func=LOG.debug)
value = WALinuxAgentShim._get_value_from_leases_file(
fallback_lease_file)
if value is None:
- msg = "No lease found; using default endpoint"
- report_diagnostic_event(msg)
- LOG.warning(msg)
value = DEFAULT_WIRESERVER_ENDPOINT
+ report_diagnostic_event(
+ 'No lease found; using default endpoint: %s' % value,
+ logger_func=LOG.warning)
endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value)
- msg = 'Azure endpoint found at %s' % endpoint_ip_address
- report_diagnostic_event(msg)
- LOG.debug(msg)
+ report_diagnostic_event(
+ 'Azure endpoint found at %s' % endpoint_ip_address,
+ logger_func=LOG.debug)
return endpoint_ip_address
@azure_ds_telemetry_reporter
- def register_with_azure_and_fetch_data(self, pubkey_info=None):
+ def register_with_azure_and_fetch_data(self, pubkey_info=None) -> dict:
"""Gets the VM's GoalState from Azure, uses the GoalState information
to report ready/send the ready signal/provisioning complete signal to
Azure, and then uses pubkey_info to filter and obtain the user's
@@ -737,30 +874,56 @@ class WALinuxAgentShim:
GoalState.
@return: The list of user's authorized pubkey values.
"""
- if self.openssl_manager is None:
+ http_client_certificate = None
+ if self.openssl_manager is None and pubkey_info is not None:
self.openssl_manager = OpenSSLManager()
+ http_client_certificate = self.openssl_manager.certificate
if self.azure_endpoint_client is None:
self.azure_endpoint_client = AzureEndpointHttpClient(
- self.openssl_manager.certificate)
- goal_state = self._fetch_goal_state_from_azure()
- ssh_keys = self._get_user_pubkeys(goal_state, pubkey_info)
+ http_client_certificate)
+ goal_state = self._fetch_goal_state_from_azure(
+ need_certificate=http_client_certificate is not None
+ )
+ ssh_keys = None
+ if pubkey_info is not None:
+ ssh_keys = self._get_user_pubkeys(goal_state, pubkey_info)
health_reporter = GoalStateHealthReporter(
goal_state, self.azure_endpoint_client, self.endpoint)
health_reporter.send_ready_signal()
return {'public-keys': ssh_keys}
@azure_ds_telemetry_reporter
- def _fetch_goal_state_from_azure(self):
+ def register_with_azure_and_report_failure(self, description: str) -> None:
+ """Gets the VM's GoalState from Azure, uses the GoalState information
+ to report failure/send provisioning failure signal to Azure.
+
+ @param: user visible error description of provisioning failure.
+ """
+ if self.azure_endpoint_client is None:
+ self.azure_endpoint_client = AzureEndpointHttpClient(None)
+ goal_state = self._fetch_goal_state_from_azure(need_certificate=False)
+ health_reporter = GoalStateHealthReporter(
+ goal_state, self.azure_endpoint_client, self.endpoint)
+ health_reporter.send_failure_signal(description=description)
+
+ @azure_ds_telemetry_reporter
+ def _fetch_goal_state_from_azure(
+ self,
+ need_certificate: bool) -> GoalState:
"""Fetches the GoalState XML from the Azure endpoint, parses the XML,
and returns a GoalState object.
+ @param need_certificate: switch to know if certificates is needed.
@return: GoalState object representing the GoalState XML
"""
unparsed_goal_state_xml = self._get_raw_goal_state_xml_from_azure()
- return self._parse_raw_goal_state_xml(unparsed_goal_state_xml)
+ return self._parse_raw_goal_state_xml(
+ unparsed_goal_state_xml,
+ need_certificate
+ )
@azure_ds_telemetry_reporter
- def _get_raw_goal_state_xml_from_azure(self):
+ def _get_raw_goal_state_xml_from_azure(self) -> str:
"""Fetches the GoalState XML from the Azure endpoint and returns
the XML as a string.
@@ -770,40 +933,51 @@ class WALinuxAgentShim:
LOG.info('Registering with Azure...')
url = 'http://{}/machine/?comp=goalstate'.format(self.endpoint)
try:
- response = self.azure_endpoint_client.get(url)
+ with events.ReportEventStack(
+ name="goalstate-retrieval",
+ description="retrieve goalstate",
+ parent=azure_ds_reporter):
+ response = self.azure_endpoint_client.get(url)
except Exception as e:
- msg = 'failed to register with Azure: %s' % e
- LOG.warning(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ 'failed to register with Azure and fetch GoalState XML: %s'
+ % e, logger_func=LOG.warning)
raise
LOG.debug('Successfully fetched GoalState XML.')
return response.contents
@azure_ds_telemetry_reporter
- def _parse_raw_goal_state_xml(self, unparsed_goal_state_xml):
+ def _parse_raw_goal_state_xml(
+ self,
+ unparsed_goal_state_xml: str,
+ need_certificate: bool) -> GoalState:
"""Parses a GoalState XML string and returns a GoalState object.
@param unparsed_goal_state_xml: GoalState XML string
+ @param need_certificate: switch to know if certificates is needed.
@return: GoalState object representing the GoalState XML
"""
try:
goal_state = GoalState(
- unparsed_goal_state_xml, self.azure_endpoint_client)
+ unparsed_goal_state_xml,
+ self.azure_endpoint_client,
+ need_certificate
+ )
except Exception as e:
- msg = 'Error processing GoalState XML: %s' % e
- LOG.warning(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(
+ 'Error processing GoalState XML: %s' % e,
+ logger_func=LOG.warning)
raise
msg = ', '.join([
'GoalState XML container id: %s' % goal_state.container_id,
'GoalState XML instance id: %s' % goal_state.instance_id,
'GoalState XML incarnation: %s' % goal_state.incarnation])
- LOG.debug(msg)
- report_diagnostic_event(msg)
+ report_diagnostic_event(msg, logger_func=LOG.debug)
return goal_state
@azure_ds_telemetry_reporter
- def _get_user_pubkeys(self, goal_state, pubkey_info):
+ def _get_user_pubkeys(
+ self, goal_state: GoalState, pubkey_info: list) -> list:
"""Gets and filters the VM admin user's authorized pubkeys.
The admin user in this case is the username specified as "admin"
@@ -838,7 +1012,7 @@ class WALinuxAgentShim:
return ssh_keys
@staticmethod
- def _filter_pubkeys(keys_by_fingerprint, pubkey_info):
+ def _filter_pubkeys(keys_by_fingerprint: dict, pubkey_info: list) -> list:
""" Filter and return only the user's actual pubkeys.
@param keys_by_fingerprint: pubkey fingerprint -> pubkey value dict
@@ -879,9 +1053,25 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None,
shim.clean_up()
+@azure_ds_telemetry_reporter
+def report_failure_to_fabric(fallback_lease_file=None, dhcp_opts=None,
+ description=None):
+ shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file,
+ dhcp_options=dhcp_opts)
+ if not description:
+ description = DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE
+ try:
+ shim.register_with_azure_and_report_failure(
+ description=description)
+ finally:
+ shim.clean_up()
+
+
def dhcp_log_cb(out, err):
- report_diagnostic_event("dhclient output stream: %s" % out)
- report_diagnostic_event("dhclient error stream: %s" % err)
+ report_diagnostic_event(
+ "dhclient output stream: %s" % out, logger_func=LOG.debug)
+ report_diagnostic_event(
+ "dhclient error stream: %s" % err, logger_func=LOG.debug)
class EphemeralDHCPv4WithReporting:
diff --git a/cloudinit/sources/helpers/digitalocean.py b/cloudinit/sources/helpers/digitalocean.py
index b545c4d6..f9be4ecb 100644
--- a/cloudinit/sources/helpers/digitalocean.py
+++ b/cloudinit/sources/helpers/digitalocean.py
@@ -5,6 +5,7 @@
import json
import random
+from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import net as cloudnet
from cloudinit import url_helper
@@ -195,11 +196,11 @@ def read_sysinfo():
# SMBIOS information
# Detect if we are on DigitalOcean and return the Droplet's ID
- vendor_name = util.read_dmi_data("system-manufacturer")
+ vendor_name = dmi.read_dmi_data("system-manufacturer")
if vendor_name != "DigitalOcean":
return (False, None)
- droplet_id = util.read_dmi_data("system-serial-number")
+ droplet_id = dmi.read_dmi_data("system-serial-number")
if droplet_id:
LOG.debug("system identified via SMBIOS as DigitalOcean Droplet: %s",
droplet_id)
diff --git a/cloudinit/sources/helpers/hetzner.py b/cloudinit/sources/helpers/hetzner.py
index 72edb023..33dc4c53 100644
--- a/cloudinit/sources/helpers/hetzner.py
+++ b/cloudinit/sources/helpers/hetzner.py
@@ -3,15 +3,12 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
-from cloudinit import log as logging
from cloudinit import url_helper
from cloudinit import util
import base64
import binascii
-LOG = logging.getLogger(__name__)
-
def read_metadata(url, timeout=2, sec_between=2, retries=30):
response = url_helper.readurl(url, timeout=timeout,
diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py
index c2ad587b..e13d6834 100644
--- a/cloudinit/sources/helpers/netlink.py
+++ b/cloudinit/sources/helpers/netlink.py
@@ -185,6 +185,54 @@ def read_rta_oper_state(data):
return InterfaceOperstate(ifname, operstate)
+def wait_for_nic_attach_event(netlink_socket, existing_nics):
+ '''Block until a single nic is attached.
+
+ :param: netlink_socket: netlink_socket to receive events
+ :param: existing_nics: List of existing nics so that we can skip them.
+ :raises: AssertionError if netlink_socket is none.
+ '''
+ LOG.debug("Preparing to wait for nic attach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ if iname in existing_nics:
+ return True
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ # We can return even if the operational state of the new nic is DOWN
+ # because we set it to UP before doing dhcp.
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_NEWLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
+def wait_for_nic_detach_event(netlink_socket):
+ '''Block until a single nic is detached and its operational state is down.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ '''
+ LOG.debug("Preparing to wait for nic detach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_DELLINK],
+ [OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
def wait_for_media_disconnect_connect(netlink_socket, ifname):
'''Block until media disconnect and connect has happened on an interface.
Listens on netlink socket to receive netlink events and when the carrier
@@ -198,10 +246,42 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
assert (netlink_socket is not None), ("netlink socket is none")
assert (ifname is not None), ("interface name is none")
assert (len(ifname) > 0), ("interface name cannot be empty")
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ # check for carrier down, up sequence
+ isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
+ if isVnetSwitch:
+ LOG.debug("Media switch happened on %s.", ifname)
+ return False
+ return True
+
+ LOG.debug("Wait for media disconnect and reconnect to happen")
+ read_netlink_messages(netlink_socket,
+ ifname,
+ [RTM_NEWLINK, RTM_DELLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+
+
+def read_netlink_messages(netlink_socket,
+ ifname_filter,
+ rtm_types,
+ operstates,
+ should_continue_callback):
+ ''' Reads from the netlink socket until the condition specified by
+ the continuation callback is met.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ :param: ifname_filter: if not None, will only listen for this interface.
+ :param: rtm_types: Type of netlink events to listen for.
+ :param: operstates: Operational states to listen.
+ :param: should_continue_callback: Specifies when to stop listening.
+ '''
+ if netlink_socket is None:
+ raise RuntimeError("Netlink socket is none")
+ data = bytes()
carrier = OPER_UP
prevCarrier = OPER_UP
- data = bytes()
- LOG.debug("Wait for media disconnect and reconnect to happen")
while True:
recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT)
if recv_data is None:
@@ -223,26 +303,26 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1)
offset = offset + padlen
LOG.debug('offset to next netlink message: %d', offset)
- # Ignore any messages not new link or del link
- if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]:
+ # Continue if we are not interested in this message.
+ if nlheader.type not in rtm_types:
continue
interface_state = read_rta_oper_state(nl_msg)
if interface_state is None:
LOG.debug('Failed to read rta attributes: %s', interface_state)
continue
- if interface_state.ifname != ifname:
+ if (ifname_filter is not None and
+ interface_state.ifname != ifname_filter):
LOG.debug(
"Ignored netlink event on interface %s. Waiting for %s.",
- interface_state.ifname, ifname)
+ interface_state.ifname, ifname_filter)
continue
- if interface_state.operstate not in [OPER_UP, OPER_DOWN]:
+ if interface_state.operstate not in operstates:
continue
prevCarrier = carrier
carrier = interface_state.operstate
- # check for carrier down, up sequence
- isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
- if isVnetSwitch:
- LOG.debug("Media switch happened on %s.", ifname)
+ if not should_continue_callback(interface_state.ifname,
+ carrier,
+ prevCarrier):
return
data = data[offset:]
diff --git a/cloudinit/sources/helpers/openstack.py b/cloudinit/sources/helpers/openstack.py
index 65e020c5..3e6365f1 100644
--- a/cloudinit/sources/helpers/openstack.py
+++ b/cloudinit/sources/helpers/openstack.py
@@ -602,11 +602,17 @@ def convert_net_json(network_json=None, known_macs=None):
elif network['type'] in ['ipv6_slaac', 'ipv6_dhcpv6-stateless',
'ipv6_dhcpv6-stateful']:
subnet.update({'type': network['type']})
- elif network['type'] in ['ipv4', 'ipv6']:
+ elif network['type'] in ['ipv4', 'static']:
subnet.update({
'type': 'static',
'address': network.get('ip_address'),
})
+ elif network['type'] in ['ipv6', 'static6']:
+ cfg.update({'accept-ra': False})
+ subnet.update({
+ 'type': 'static6',
+ 'address': network.get('ip_address'),
+ })
# Enable accept_ra for stateful and legacy ipv6_dhcp types
if network['type'] in ['ipv6_dhcpv6-stateful', 'ipv6_dhcp']:
diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py
index 10760bd6..cafe3961 100644
--- a/cloudinit/sources/helpers/tests/test_netlink.py
+++ b/cloudinit/sources/helpers/tests/test_netlink.py
@@ -9,9 +9,10 @@ import codecs
from cloudinit.sources.helpers.netlink import (
NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket,
read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect,
+ wait_for_nic_attach_event, wait_for_nic_detach_event,
OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT,
- OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK,
- RTM_GETLINK, MAX_SIZE)
+ OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_DELLINK,
+ RTM_SETLINK, RTM_GETLINK, MAX_SIZE)
def int_to_bytes(i):
@@ -135,6 +136,75 @@ class TestParseNetlinkMessage(CiTestCase):
@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
+class TestNicAttachDetach(CiTestCase):
+ with_logs = True
+
+ def _media_switch_data(self, ifname, msg_type, operstate):
+ '''construct netlink data with specified fields'''
+ if ifname and operstate is not None:
+ data = bytearray(48)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3,
+ bytes, 5, 16, int_to_bytes(operstate))
+ elif ifname:
+ data = bytearray(40)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes)
+ elif operstate:
+ data = bytearray(40)
+ struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16,
+ int_to_bytes(operstate))
+ struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0)
+ return data
+
+ def test_nic_attached_oper_down(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attached_oper_up(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP)
+ m_read_netlink_socket.side_effect = [data_op_up]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attach_ignore_existing(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth0"])
+ self.assertEqual(m_read_netlink_socket.call_count, 2)
+ self.assertEqual("eth1", ifread)
+
+ def test_nic_attach_read_first(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth1"])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual("eth0", ifread)
+
+ def test_nic_detached(self, m_read_netlink_socket, m_socket):
+ '''Test for an existing nic detached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_DELLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_detach_event(m_socket)
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+
+@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
+@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
class TestWaitForMediaDisconnectConnect(CiTestCase):
with_logs = True
diff --git a/cloudinit/sources/helpers/vmware/imc/config_nic.py b/cloudinit/sources/helpers/vmware/imc/config_nic.py
index 3745a262..9cd2c0c0 100644
--- a/cloudinit/sources/helpers/vmware/imc/config_nic.py
+++ b/cloudinit/sources/helpers/vmware/imc/config_nic.py
@@ -275,6 +275,7 @@ class NicConfigurator(object):
"# DO NOT EDIT THIS FILE BY HAND --"
" AUTOMATICALLY GENERATED BY cloud-init",
"source /etc/network/interfaces.d/*.cfg",
+ "source-directory /etc/network/interfaces.d",
]
util.write_file(interfaceFile, content='\n'.join(lines))
diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py
index 7bd23813..a7bbdfd9 100644
--- a/cloudinit/sources/tests/test_oracle.py
+++ b/cloudinit/sources/tests/test_oracle.py
@@ -153,20 +153,20 @@ class TestDataSourceOracle:
class TestIsPlatformViable(test_helpers.CiTestCase):
- @mock.patch(DS_PATH + ".util.read_dmi_data",
+ @mock.patch(DS_PATH + ".dmi.read_dmi_data",
return_value=oracle.CHASSIS_ASSET_TAG)
def test_expected_viable(self, m_read_dmi_data):
"""System with known chassis tag is viable."""
self.assertTrue(oracle._is_platform_viable())
m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')])
- @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None)
+ @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value=None)
def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data):
"""System without known chassis tag is not viable."""
self.assertFalse(oracle._is_platform_viable())
m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')])
- @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs")
+ @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value="LetsGoCubs")
def test_expected_not_viable_other(self, m_read_dmi_data):
"""System with unnown chassis tag is not viable."""
self.assertFalse(oracle._is_platform_viable())