summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com>2020-11-23 07:04:05 -0800
committerGitHub <noreply@github.com>2020-11-23 10:04:05 -0500
commita4d0feb050e32277a218e45bfb6a496d26ff46d0 (patch)
tree505204ef29709f7e1eda97923c04fc8eae72edb0
parent66b4be8b6da188a0667bd8c86a25155b6f4f3f6c (diff)
downloadvyos-cloud-init-a4d0feb050e32277a218e45bfb6a496d26ff46d0.tar.gz
vyos-cloud-init-a4d0feb050e32277a218e45bfb6a496d26ff46d0.zip
Ability to hot-attach NICs to preprovisioned VMs before reprovisioning (#613)
Adds the ability to run the Azure preprovisioned VMs as NIC-less and then hot-attach them when assigned for reprovision. The NIC on the preprovisioned VM is hot-detached as soon as it reports ready and goes into wait for one or more interfaces to be hot-attached. Once they are attached, cloud-init gets the expected number of NICs (in case there are more than one) that will be attached from IMDS and waits until all of them are attached. After all the NICs are attached, reprovision proceeds as usual.
-rw-r--r--cloudinit/distros/networking.py15
-rw-r--r--cloudinit/distros/tests/test_networking.py31
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py515
-rw-r--r--cloudinit/sources/helpers/netlink.py102
-rw-r--r--cloudinit/sources/helpers/tests/test_netlink.py74
-rw-r--r--tests/unittests/test_datasource/test_azure.py436
-rw-r--r--tools/.github-cla-signers1
7 files changed, 1109 insertions, 65 deletions
diff --git a/cloudinit/distros/networking.py b/cloudinit/distros/networking.py
index e407fa29..c291196a 100644
--- a/cloudinit/distros/networking.py
+++ b/cloudinit/distros/networking.py
@@ -2,6 +2,7 @@ import abc
import logging
import os
+from cloudinit import subp
from cloudinit import net, util
@@ -175,6 +176,10 @@ class Networking(metaclass=abc.ABCMeta):
if strict:
raise RuntimeError(msg)
+ @abc.abstractmethod
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ """Try setting the link to up explicitly and return if it is up."""
+
class BSDNetworking(Networking):
"""Implementation of networking functionality shared across BSDs."""
@@ -185,6 +190,9 @@ class BSDNetworking(Networking):
def settle(self, *, exists=None) -> None:
"""BSD has no equivalent to `udevadm settle`; noop."""
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ raise NotImplementedError()
+
class LinuxNetworking(Networking):
"""Implementation of networking functionality common to Linux distros."""
@@ -214,3 +222,10 @@ class LinuxNetworking(Networking):
if exists is not None:
exists = net.sys_dev_path(exists)
util.udevadm_settle(exists=exists)
+
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ """Try setting the link to up explicitly and return if it is up.
+ Not guaranteed to bring the interface up. The caller is expected to
+ add wait times before retrying."""
+ subp.subp(['ip', 'link', 'set', devname, 'up'])
+ return self.is_up(devname)
diff --git a/cloudinit/distros/tests/test_networking.py b/cloudinit/distros/tests/test_networking.py
index b9a63842..ec508f4d 100644
--- a/cloudinit/distros/tests/test_networking.py
+++ b/cloudinit/distros/tests/test_networking.py
@@ -30,6 +30,9 @@ def generic_networking_cls():
def settle(self, *args, **kwargs):
raise NotImplementedError
+ def try_set_link_up(self, *args, **kwargs):
+ raise NotImplementedError
+
error = AssertionError("Unexpectedly used /sys in generic networking code")
with mock.patch(
"cloudinit.net.get_sys_class_path", side_effect=error,
@@ -74,6 +77,34 @@ class TestLinuxNetworkingIsPhysical:
assert LinuxNetworking().is_physical(devname)
+class TestBSDNetworkingTrySetLinkUp:
+ def test_raises_notimplementederror(self):
+ with pytest.raises(NotImplementedError):
+ BSDNetworking().try_set_link_up("eth0")
+
+
+@mock.patch("cloudinit.net.is_up")
+@mock.patch("cloudinit.distros.networking.subp.subp")
+class TestLinuxNetworkingTrySetLinkUp:
+ def test_calls_subp_return_true(self, m_subp, m_is_up):
+ devname = "eth0"
+ m_is_up.return_value = True
+ is_success = LinuxNetworking().try_set_link_up(devname)
+
+ assert (mock.call(['ip', 'link', 'set', devname, 'up']) ==
+ m_subp.call_args_list[-1])
+ assert is_success
+
+ def test_calls_subp_return_false(self, m_subp, m_is_up):
+ devname = "eth0"
+ m_is_up.return_value = False
+ is_success = LinuxNetworking().try_set_link_up(devname)
+
+ assert (mock.call(['ip', 'link', 'set', devname, 'up']) ==
+ m_subp.call_args_list[-1])
+ assert not is_success
+
+
class TestBSDNetworkingSettle:
def test_settle_doesnt_error(self):
# This also implicitly tests that it doesn't use subp.subp
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index f777a007..04ff2131 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -12,8 +12,10 @@ import os
import os.path
import re
from time import time
+from time import sleep
from xml.dom import minidom
import xml.etree.ElementTree as ET
+from enum import Enum
from cloudinit import dmi
from cloudinit import log as logging
@@ -67,13 +69,27 @@ DEFAULT_FS = 'ext4'
# DMI chassis-asset-tag is set static for all azure instances
AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
+REPROVISION_NIC_ATTACH_MARKER_FILE = "/var/lib/cloud/data/wait_for_nic_attach"
+REPROVISION_NIC_DETACHED_MARKER_FILE = "/var/lib/cloud/data/nic_detached"
REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
AGENT_SEED_DIR = '/var/lib/waagent'
+
# In the event where the IMDS primary server is not
# available, it takes 1s to fallback to the secondary one
IMDS_TIMEOUT_IN_SECONDS = 2
IMDS_URL = "http://169.254.169.254/metadata/"
+IMDS_VER = "2019-06-01"
+IMDS_VER_PARAM = "api-version={}".format(IMDS_VER)
+
+
+class metadata_type(Enum):
+ compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM)
+ network = "{}instance/network?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+ reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+
PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
@@ -434,20 +450,39 @@ class DataSourceAzure(sources.DataSource):
# need to look in the datadir and consider that valid
ddir = self.ds_cfg['data_dir']
+ # The order in which the candidates are inserted matters here, because
+ # it determines the value of ret. More specifically, the first one in
+ # the candidate list determines the path to take in order to get the
+ # metadata we need.
candidates = [self.seed_dir]
if os.path.isfile(REPROVISION_MARKER_FILE):
candidates.insert(0, "IMDS")
+ report_diagnostic_event("Reprovision marker file already present "
+ "before crawling Azure metadata: %s" %
+ REPROVISION_MARKER_FILE,
+ logger_func=LOG.debug)
+ elif os.path.isfile(REPROVISION_NIC_ATTACH_MARKER_FILE):
+ candidates.insert(0, "NIC_ATTACH_MARKER_PRESENT")
+ report_diagnostic_event("Reprovision nic attach marker file "
+ "already present before crawling Azure "
+ "metadata: %s" %
+ REPROVISION_NIC_ATTACH_MARKER_FILE,
+ logger_func=LOG.debug)
candidates.extend(list_possible_azure_ds_devs())
if ddir:
candidates.append(ddir)
found = None
reprovision = False
+ reprovision_after_nic_attach = False
for cdev in candidates:
try:
if cdev == "IMDS":
ret = None
reprovision = True
+ elif cdev == "NIC_ATTACH_MARKER_PRESENT":
+ ret = None
+ reprovision_after_nic_attach = True
elif cdev.startswith("/dev/"):
if util.is_FreeBSD():
ret = util.mount_cb(cdev, load_azure_ds_dir,
@@ -472,12 +507,19 @@ class DataSourceAzure(sources.DataSource):
continue
perform_reprovision = reprovision or self._should_reprovision(ret)
- if perform_reprovision:
+ perform_reprovision_after_nic_attach = (
+ reprovision_after_nic_attach or
+ self._should_reprovision_after_nic_attach(ret))
+
+ if perform_reprovision or perform_reprovision_after_nic_attach:
if util.is_FreeBSD():
msg = "Free BSD is not supported for PPS VMs"
report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
+ if perform_reprovision_after_nic_attach:
+ self._wait_for_all_nics_ready()
ret = self._reprovision()
+
imds_md = get_metadata_from_imds(
self.fallback_interface, retries=10)
(md, userdata_raw, cfg, files) = ret
@@ -508,7 +550,7 @@ class DataSourceAzure(sources.DataSource):
crawled_data['metadata']['random_seed'] = seed
crawled_data['metadata']['instance-id'] = self._iid()
- if perform_reprovision:
+ if perform_reprovision or perform_reprovision_after_nic_attach:
LOG.info("Reporting ready to Azure after getting ReprovisionData")
use_cached_ephemeral = (
self.distro.networking.is_up(self.fallback_interface) and
@@ -617,14 +659,12 @@ class DataSourceAzure(sources.DataSource):
LOG.debug('Retrieved SSH keys from IMDS')
except KeyError:
log_msg = 'Unable to get keys from IMDS, falling back to OVF'
- LOG.debug(log_msg)
report_diagnostic_event(log_msg, logger_func=LOG.debug)
try:
ssh_keys = self.metadata['public-keys']
LOG.debug('Retrieved keys from OVF')
except KeyError:
log_msg = 'No keys available from OVF'
- LOG.debug(log_msg)
report_diagnostic_event(log_msg, logger_func=LOG.debug)
return ssh_keys
@@ -660,10 +700,293 @@ class DataSourceAzure(sources.DataSource):
LOG.debug("negotiating already done for %s",
self.get_instance_id())
+ @azure_ds_telemetry_reporter
+ def _wait_for_nic_detach(self, nl_sock):
+ """Use the netlink socket provided to wait for nic detach event.
+ NOTE: The function doesn't close the socket. The caller owns closing
+ the socket and disposing it safely.
+ """
+ try:
+ ifname = None
+
+ # Preprovisioned VM will only have one NIC, and it gets
+ # detached immediately after deployment.
+ with events.ReportEventStack(
+ name="wait-for-nic-detach",
+ description=("wait for nic detach"),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_detach_event(nl_sock)
+ if ifname is None:
+ msg = ("Preprovisioned nic not detached as expected. "
+ "Proceeding without failing.")
+ report_diagnostic_event(msg, logger_func=LOG.warning)
+ else:
+ report_diagnostic_event("The preprovisioned nic %s is detached"
+ % ifname, logger_func=LOG.warning)
+ path = REPROVISION_NIC_DETACHED_MARKER_FILE
+ LOG.info("Creating a marker file for nic detached: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+ raise
+
+ @azure_ds_telemetry_reporter
+ def wait_for_link_up(self, ifname):
+ """In cases where the link state is still showing down after a nic is
+ hot-attached, we can attempt to bring it up by forcing the hv_netvsc
+ drivers to query the link state by unbinding and then binding the
+ device. This function attempts infinitely until the link is up,
+ because we cannot proceed further until we have a stable link."""
+
+ if self.distro.networking.try_set_link_up(ifname):
+ report_diagnostic_event("The link %s is already up." % ifname,
+ logger_func=LOG.info)
+ return
+
+ LOG.info("Attempting to bring %s up", ifname)
+
+ attempts = 0
+ while True:
+
+ LOG.info("Unbinding and binding the interface %s", ifname)
+ devicename = net.read_sys_net(ifname,
+ 'device/device_id').strip('{}')
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind',
+ devicename)
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/bind',
+ devicename)
+
+ attempts = attempts + 1
+ if self.distro.networking.try_set_link_up(ifname):
+ msg = "The link %s is up after %s attempts" % (ifname,
+ attempts)
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ return
+
+ sleep_duration = 1
+ msg = ("Link is not up after %d attempts with %d seconds sleep "
+ "between attempts." % (attempts, sleep_duration))
+
+ if attempts % 10 == 0:
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ else:
+ LOG.info(msg)
+
+ sleep(sleep_duration)
+
+ @azure_ds_telemetry_reporter
+ def _create_report_ready_marker(self):
+ path = REPORTED_READY_MARKER_FILE
+ LOG.info(
+ "Creating a marker file to report ready: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ report_diagnostic_event(
+ 'Successfully created reported ready marker file '
+ 'while in the preprovisioning pool.',
+ logger_func=LOG.debug)
+
+ @azure_ds_telemetry_reporter
+ def _report_ready_if_needed(self):
+ """Report ready to the platform if the marker file is not present,
+ and create the marker file.
+ """
+ have_not_reported_ready = (
+ not os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ if have_not_reported_ready:
+ report_diagnostic_event("Reporting ready before nic detach",
+ logger_func=LOG.info)
+ try:
+ with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease:
+ self._report_ready(lease=lease)
+ except Exception as e:
+ report_diagnostic_event("Exception reporting ready during "
+ "preprovisioning before nic detach: %s"
+ % e, logger_func=LOG.error)
+ raise
+ self._create_report_ready_marker()
+ else:
+ report_diagnostic_event("Already reported ready before nic detach."
+ " The marker file already exists: %s" %
+ REPORTED_READY_MARKER_FILE,
+ logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _check_if_nic_is_primary(self, ifname):
+ """Check if a given interface is the primary nic or not. If it is the
+ primary nic, then we also get the expected total nic count from IMDS.
+ IMDS will process the request and send a response only for primary NIC.
+ """
+ is_primary = False
+ expected_nic_count = -1
+ imds_md = None
+
+ # For now, only a VM's primary NIC can contact IMDS and WireServer. If
+ # DHCP fails for a NIC, we have no mechanism to determine if the NIC is
+ # primary or secondary. In this case, the desired behavior is to fail
+ # VM provisioning if there is any DHCP failure when trying to determine
+ # the primary NIC.
+ try:
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description=("obtain dhcp lease for %s when attempting to "
+ "determine primary NIC during reprovision of "
+ "a pre-provisioned VM" % ifname),
+ parent=azure_ds_reporter):
+ dhcp_ctx = EphemeralDHCPv4(
+ iface=ifname,
+ dhcp_log_func=dhcp_log_cb)
+ dhcp_ctx.obtain_lease()
+ except Exception as e:
+ report_diagnostic_event("Giving up. Failed to obtain dhcp lease "
+ "for %s when attempting to determine "
+ "primary NIC during reprovision due to %s"
+ % (ifname, e), logger_func=LOG.error)
+ raise
+
+ # Primary nic detection will be optimized in the future. The fact that
+ # primary nic is being attached first helps here. Otherwise each nic
+ # could add several seconds of delay.
+ try:
+ imds_md = get_metadata_from_imds(
+ ifname,
+ 5,
+ metadata_type.network)
+ except Exception as e:
+ LOG.warning(
+ "Failed to get network metadata using nic %s. Attempt to "
+ "contact IMDS failed with error %s. Assuming this is not the "
+ "primary nic.", ifname, e)
+ finally:
+ # If we are not the primary nic, then clean the dhcp context.
+ if imds_md is None:
+ dhcp_ctx.clean_network()
+
+ if imds_md is not None:
+ # Only primary NIC will get a response from IMDS.
+ LOG.info("%s is the primary nic", ifname)
+ is_primary = True
+
+ # If primary, set ephemeral dhcp ctx so we can report ready
+ self._ephemeral_dhcp_ctx = dhcp_ctx
+
+ # Set the expected nic count based on the response received.
+ expected_nic_count = len(
+ imds_md['interface'])
+ report_diagnostic_event("Expected nic count: %d" %
+ expected_nic_count, logger_func=LOG.info)
+
+ return is_primary, expected_nic_count
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_hot_attached_nics(self, nl_sock):
+ """Wait until all the expected nics for the vm are hot-attached.
+ The expected nic count is obtained by requesting the network metadata
+ from IMDS.
+ """
+ LOG.info("Waiting for nics to be hot-attached")
+ try:
+ # Wait for nics to be attached one at a time, until we know for
+ # sure that all nics have been attached.
+ nics_found = []
+ primary_nic_found = False
+ expected_nic_count = -1
+
+ # Wait for netlink nic attach events. After the first nic is
+ # attached, we are already in the customer vm deployment path and
+ # so eerything from then on should happen fast and avoid
+ # unnecessary delays wherever possible.
+ while True:
+ ifname = None
+ with events.ReportEventStack(
+ name="wait-for-nic-attach",
+ description=("wait for nic attach after %d nics have "
+ "been attached" % len(nics_found)),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_attach_event(nl_sock,
+ nics_found)
+
+ # wait_for_nic_attach_event guarantees that ifname it not None
+ nics_found.append(ifname)
+ report_diagnostic_event("Detected nic %s attached." % ifname,
+ logger_func=LOG.info)
+
+ # Attempt to bring the interface's operating state to
+ # UP in case it is not already.
+ self.wait_for_link_up(ifname)
+
+ # If primary nic is not found, check if this is it. The
+ # platform will attach the primary nic first so we
+ # won't be in primary_nic_found = false state for long.
+ if not primary_nic_found:
+ LOG.info("Checking if %s is the primary nic",
+ ifname)
+ (primary_nic_found, expected_nic_count) = (
+ self._check_if_nic_is_primary(ifname))
+
+ # Exit criteria: check if we've discovered all nics
+ if (expected_nic_count != -1
+ and len(nics_found) >= expected_nic_count):
+ LOG.info("Found all the nics for this VM.")
+ break
+
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_all_nics_ready(self):
+ """Wait for nic(s) to be hot-attached. There may be multiple nics
+ depending on the customer request.
+ But only primary nic would be able to communicate with wireserver
+ and IMDS. So we detect and save the primary nic to be used later.
+ """
+
+ nl_sock = None
+ try:
+ nl_sock = netlink.create_bound_netlink_socket()
+
+ report_ready_marker_present = bool(
+ os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ # Report ready if the marker file is not already present.
+ # The nic of the preprovisioned vm gets hot-detached as soon as
+ # we report ready. So no need to save the dhcp context.
+ self._report_ready_if_needed()
+
+ has_nic_been_detached = bool(
+ os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE))
+
+ if not has_nic_been_detached:
+ LOG.info("NIC has not been detached yet.")
+ self._wait_for_nic_detach(nl_sock)
+
+ # If we know that the preprovisioned nic has been detached, and we
+ # still have a fallback nic, then it means the VM must have
+ # rebooted as part of customer assignment, and all the nics have
+ # already been attached by the Azure platform. So there is no need
+ # to wait for nics to be hot-attached.
+ if not self.fallback_interface:
+ self._wait_for_hot_attached_nics(nl_sock)
+ else:
+ report_diagnostic_event("Skipping waiting for nic attach "
+ "because we already have a fallback "
+ "interface. Report Ready marker "
+ "present before detaching nics: %s" %
+ report_ready_marker_present,
+ logger_func=LOG.info)
+ except netlink.NetlinkCreateSocketError as e:
+ report_diagnostic_event(e, logger_func=LOG.warning)
+ raise
+ finally:
+ if nl_sock:
+ nl_sock.close()
+
def _poll_imds(self):
"""Poll IMDS for the new provisioning data until we get a valid
response. Then return the returned JSON object."""
- url = IMDS_URL + "reprovisiondata?api-version=2017-04-02"
+ url = metadata_type.reprovisiondata.value
headers = {"Metadata": "true"}
nl_sock = None
report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE))
@@ -705,17 +1028,31 @@ class DataSourceAzure(sources.DataSource):
logger_func=LOG.warning)
return False
- LOG.debug("Wait for vnetswitch to happen")
+ # When the interface is hot-attached, we would have already
+ # done dhcp and set the dhcp context. In that case, skip
+ # the attempt to do dhcp.
+ is_ephemeral_ctx_present = self._ephemeral_dhcp_ctx is not None
+ msg = ("Unexpected error. Dhcp context is not expected to be already "
+ "set when we need to wait for vnet switch")
+ if is_ephemeral_ctx_present and report_ready:
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ raise RuntimeError(msg)
+
while True:
try:
- # Save our EphemeralDHCPv4 context to avoid repeated dhcp
- with events.ReportEventStack(
- name="obtain-dhcp-lease",
- description="obtain dhcp lease",
- parent=azure_ds_reporter):
- self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
- dhcp_log_func=dhcp_log_cb)
- lease = self._ephemeral_dhcp_ctx.obtain_lease()
+ # Since is_ephemeral_ctx_present is set only once, this ensures
+ # that with regular reprovisioning, dhcp is always done every
+ # time the loop runs.
+ if not is_ephemeral_ctx_present:
+ # Save our EphemeralDHCPv4 context to avoid repeated dhcp
+ # later when we report ready
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description="obtain dhcp lease",
+ parent=azure_ds_reporter):
+ self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
+ dhcp_log_func=dhcp_log_cb)
+ lease = self._ephemeral_dhcp_ctx.obtain_lease()
if vnet_switched:
dhcp_attempts += 1
@@ -737,17 +1074,10 @@ class DataSourceAzure(sources.DataSource):
self._ephemeral_dhcp_ctx.clean_network()
raise sources.InvalidMetaDataException(msg)
- path = REPORTED_READY_MARKER_FILE
- LOG.info(
- "Creating a marker file to report ready: %s", path)
- util.write_file(path, "{pid}: {time}\n".format(
- pid=os.getpid(), time=time()))
- report_diagnostic_event(
- 'Successfully created reported ready marker file '
- 'while in the preprovisioning pool.',
- logger_func=LOG.debug)
+ self._create_report_ready_marker()
report_ready = False
+ LOG.debug("Wait for vnetswitch to happen")
with events.ReportEventStack(
name="wait-for-media-disconnect-connect",
description="wait for vnet switch",
@@ -863,6 +1193,35 @@ class DataSourceAzure(sources.DataSource):
"connectivity issues: %s" % e, logger_func=LOG.warning)
return False
+ def _should_reprovision_after_nic_attach(self, candidate_metadata) -> bool:
+ """Whether or not we should wait for nic attach and then poll
+ IMDS for reprovisioning data. Also sets a marker file to poll IMDS.
+
+ The marker file is used for the following scenario: the VM boots into
+ wait for nic attach, which we expect to be proceeding infinitely until
+ the nic is attached. If for whatever reason the platform moves us to a
+ new host (for instance a hardware issue), we need to keep waiting.
+ However, since the VM reports ready to the Fabric, we will not attach
+ the ISO, thus cloud-init needs to have a way of knowing that it should
+ jump back into the waiting mode in order to retrieve the ovf_env.
+
+ @param candidate_metadata: Metadata obtained from reading ovf-env.
+ @return: Whether to reprovision after waiting for nics to be attached.
+ """
+ if not candidate_metadata:
+ return False
+ (_md, _userdata_raw, cfg, _files) = candidate_metadata
+ path = REPROVISION_NIC_ATTACH_MARKER_FILE
+ if (cfg.get('PreprovisionedVMType', None) == "Savable" or
+ os.path.isfile(path)):
+ if not os.path.isfile(path):
+ LOG.info("Creating a marker file to wait for nic attach: %s",
+ path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ return True
+ return False
+
def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.
Also sets a marker file to poll IMDS.
@@ -879,6 +1238,7 @@ class DataSourceAzure(sources.DataSource):
(_md, _userdata_raw, cfg, _files) = ret
path = REPROVISION_MARKER_FILE
if (cfg.get('PreprovisionedVm') is True or
+ cfg.get('PreprovisionedVMType', None) == 'Running' or
os.path.isfile(path)):
if not os.path.isfile(path):
LOG.info("Creating a marker file to poll imds: %s",
@@ -944,6 +1304,8 @@ class DataSourceAzure(sources.DataSource):
util.del_file(REPORTED_READY_MARKER_FILE)
util.del_file(REPROVISION_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_ATTACH_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_DETACHED_MARKER_FILE)
return fabric_data
@azure_ds_telemetry_reporter
@@ -1399,34 +1761,109 @@ def read_azure_ovf(contents):
if 'ssh_pwauth' not in cfg and password:
cfg['ssh_pwauth'] = True
- cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom)
+ preprovisioning_cfg = _get_preprovisioning_cfgs(dom)
+ cfg = util.mergemanydict([cfg, preprovisioning_cfg])
return (md, ud, cfg)
@azure_ds_telemetry_reporter
-def _extract_preprovisioned_vm_setting(dom):
- """Read the preprovision flag from the ovf. It should not
- exist unless true."""
+def _get_preprovisioning_cfgs(dom):
+ """Read the preprovisioning related flags from ovf and populates a dict
+ with the info.
+
+ Two flags are in use today: PreprovisionedVm bool and
+ PreprovisionedVMType enum. In the long term, the PreprovisionedVm bool
+ will be deprecated in favor of PreprovisionedVMType string/enum.
+
+ Only these combinations of values are possible today:
+ - PreprovisionedVm=True and PreprovisionedVMType=Running
+ - PreprovisionedVm=False and PreprovisionedVMType=Savable
+ - PreprovisionedVm is missing and PreprovisionedVMType=Running/Savable
+ - PreprovisionedVm=False and PreprovisionedVMType is missing
+
+ More specifically, this will never happen:
+ - PreprovisionedVm=True and PreprovisionedVMType=Savable
+ """
+ cfg = {
+ "PreprovisionedVm": False,
+ "PreprovisionedVMType": None
+ }
+
platform_settings_section = find_child(
dom.documentElement,
lambda n: n.localName == "PlatformSettingsSection")
if not platform_settings_section or len(platform_settings_section) == 0:
LOG.debug("PlatformSettingsSection not found")
- return False
+ return cfg
platform_settings = find_child(
platform_settings_section[0],
lambda n: n.localName == "PlatformSettings")
if not platform_settings or len(platform_settings) == 0:
LOG.debug("PlatformSettings not found")
- return False
- preprovisionedVm = find_child(
+ return cfg
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ cfg["PreprovisionedVm"] = _get_preprovisionedvm_cfg_value(
+ platform_settings)
+
+ cfg["PreprovisionedVMType"] = _get_preprovisionedvmtype_cfg_value(
+ platform_settings)
+ return cfg
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvm_cfg_value(platform_settings):
+ preprovisionedVm = False
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ preprovisionedVmVal = find_child(
platform_settings[0],
lambda n: n.localName == "PreprovisionedVm")
- if not preprovisionedVm or len(preprovisionedVm) == 0:
+ if not preprovisionedVmVal or len(preprovisionedVmVal) == 0:
LOG.debug("PreprovisionedVm not found")
- return False
- return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue)
+ return preprovisionedVm
+ preprovisionedVm = util.translate_bool(
+ preprovisionedVmVal[0].firstChild.nodeValue)
+
+ report_diagnostic_event(
+ "PreprovisionedVm: %s" % preprovisionedVm, logger_func=LOG.info)
+
+ return preprovisionedVm
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvmtype_cfg_value(platform_settings):
+ preprovisionedVMType = None
+
+ # Read the PreprovisionedVMType value from the ovf. It can be
+ # 'Running' or 'Savable' or not exist. This enum value is intended to
+ # replace PreprovisionedVm bool flag in the long term.
+ # A Running VM is the same as preprovisioned VMs of today. This is
+ # equivalent to having PreprovisionedVm=True.
+ # A Savable VM is one whose nic is hot-detached immediately after it
+ # reports ready the first time to free up the network resources.
+ # Once assigned to customer, the customer-requested nics are
+ # hot-attached to it and reprovision happens like today.
+ preprovisionedVMTypeVal = find_child(
+ platform_settings[0],
+ lambda n: n.localName == "PreprovisionedVMType")
+ if (not preprovisionedVMTypeVal or len(preprovisionedVMTypeVal) == 0 or
+ preprovisionedVMTypeVal[0].firstChild is None):
+ LOG.debug("PreprovisionedVMType not found")
+ return preprovisionedVMType
+
+ preprovisionedVMType = preprovisionedVMTypeVal[0].firstChild.nodeValue
+
+ report_diagnostic_event(
+ "PreprovisionedVMType: %s" % preprovisionedVMType,
+ logger_func=LOG.info)
+
+ return preprovisionedVMType
def encrypt_pass(password, salt_id="$6$"):
@@ -1588,8 +2025,10 @@ def _generate_network_config_from_fallback_config() -> dict:
@azure_ds_telemetry_reporter
-def get_metadata_from_imds(fallback_nic, retries):
- """Query Azure's network metadata service, returning a dictionary.
+def get_metadata_from_imds(fallback_nic,
+ retries,
+ md_type=metadata_type.compute):
+ """Query Azure's instance metadata service, returning a dictionary.
If network is not up, setup ephemeral dhcp on fallback_nic to talk to the
IMDS. For more info on IMDS:
@@ -1604,7 +2043,7 @@ def get_metadata_from_imds(fallback_nic, retries):
"""
kwargs = {'logfunc': LOG.debug,
'msg': 'Crawl of Azure Instance Metadata Service (IMDS)',
- 'func': _get_metadata_from_imds, 'args': (retries,)}
+ 'func': _get_metadata_from_imds, 'args': (retries, md_type,)}
if net.is_up(fallback_nic):
return util.log_time(**kwargs)
else:
@@ -1620,9 +2059,9 @@ def get_metadata_from_imds(fallback_nic, retries):
@azure_ds_telemetry_reporter
-def _get_metadata_from_imds(retries):
+def _get_metadata_from_imds(retries, md_type=metadata_type.compute):
- url = IMDS_URL + "instance?api-version=2019-06-01"
+ url = md_type.value
headers = {"Metadata": "true"}
try:
response = readurl(
diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py
index c2ad587b..e13d6834 100644
--- a/cloudinit/sources/helpers/netlink.py
+++ b/cloudinit/sources/helpers/netlink.py
@@ -185,6 +185,54 @@ def read_rta_oper_state(data):
return InterfaceOperstate(ifname, operstate)
+def wait_for_nic_attach_event(netlink_socket, existing_nics):
+ '''Block until a single nic is attached.
+
+ :param: netlink_socket: netlink_socket to receive events
+ :param: existing_nics: List of existing nics so that we can skip them.
+ :raises: AssertionError if netlink_socket is none.
+ '''
+ LOG.debug("Preparing to wait for nic attach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ if iname in existing_nics:
+ return True
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ # We can return even if the operational state of the new nic is DOWN
+ # because we set it to UP before doing dhcp.
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_NEWLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
+def wait_for_nic_detach_event(netlink_socket):
+ '''Block until a single nic is detached and its operational state is down.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ '''
+ LOG.debug("Preparing to wait for nic detach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_DELLINK],
+ [OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
def wait_for_media_disconnect_connect(netlink_socket, ifname):
'''Block until media disconnect and connect has happened on an interface.
Listens on netlink socket to receive netlink events and when the carrier
@@ -198,10 +246,42 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
assert (netlink_socket is not None), ("netlink socket is none")
assert (ifname is not None), ("interface name is none")
assert (len(ifname) > 0), ("interface name cannot be empty")
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ # check for carrier down, up sequence
+ isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
+ if isVnetSwitch:
+ LOG.debug("Media switch happened on %s.", ifname)
+ return False
+ return True
+
+ LOG.debug("Wait for media disconnect and reconnect to happen")
+ read_netlink_messages(netlink_socket,
+ ifname,
+ [RTM_NEWLINK, RTM_DELLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+
+
+def read_netlink_messages(netlink_socket,
+ ifname_filter,
+ rtm_types,
+ operstates,
+ should_continue_callback):
+ ''' Reads from the netlink socket until the condition specified by
+ the continuation callback is met.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ :param: ifname_filter: if not None, will only listen for this interface.
+ :param: rtm_types: Type of netlink events to listen for.
+ :param: operstates: Operational states to listen.
+ :param: should_continue_callback: Specifies when to stop listening.
+ '''
+ if netlink_socket is None:
+ raise RuntimeError("Netlink socket is none")
+ data = bytes()
carrier = OPER_UP
prevCarrier = OPER_UP
- data = bytes()
- LOG.debug("Wait for media disconnect and reconnect to happen")
while True:
recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT)
if recv_data is None:
@@ -223,26 +303,26 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1)
offset = offset + padlen
LOG.debug('offset to next netlink message: %d', offset)
- # Ignore any messages not new link or del link
- if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]:
+ # Continue if we are not interested in this message.
+ if nlheader.type not in rtm_types:
continue
interface_state = read_rta_oper_state(nl_msg)
if interface_state is None:
LOG.debug('Failed to read rta attributes: %s', interface_state)
continue
- if interface_state.ifname != ifname:
+ if (ifname_filter is not None and
+ interface_state.ifname != ifname_filter):
LOG.debug(
"Ignored netlink event on interface %s. Waiting for %s.",
- interface_state.ifname, ifname)
+ interface_state.ifname, ifname_filter)
continue
- if interface_state.operstate not in [OPER_UP, OPER_DOWN]:
+ if interface_state.operstate not in operstates:
continue
prevCarrier = carrier
carrier = interface_state.operstate
- # check for carrier down, up sequence
- isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
- if isVnetSwitch:
- LOG.debug("Media switch happened on %s.", ifname)
+ if not should_continue_callback(interface_state.ifname,
+ carrier,
+ prevCarrier):
return
data = data[offset:]
diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py
index 10760bd6..cafe3961 100644
--- a/cloudinit/sources/helpers/tests/test_netlink.py
+++ b/cloudinit/sources/helpers/tests/test_netlink.py
@@ -9,9 +9,10 @@ import codecs
from cloudinit.sources.helpers.netlink import (
NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket,
read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect,
+ wait_for_nic_attach_event, wait_for_nic_detach_event,
OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT,
- OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK,
- RTM_GETLINK, MAX_SIZE)
+ OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_DELLINK,
+ RTM_SETLINK, RTM_GETLINK, MAX_SIZE)
def int_to_bytes(i):
@@ -135,6 +136,75 @@ class TestParseNetlinkMessage(CiTestCase):
@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
+class TestNicAttachDetach(CiTestCase):
+ with_logs = True
+
+ def _media_switch_data(self, ifname, msg_type, operstate):
+ '''construct netlink data with specified fields'''
+ if ifname and operstate is not None:
+ data = bytearray(48)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3,
+ bytes, 5, 16, int_to_bytes(operstate))
+ elif ifname:
+ data = bytearray(40)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes)
+ elif operstate:
+ data = bytearray(40)
+ struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16,
+ int_to_bytes(operstate))
+ struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0)
+ return data
+
+ def test_nic_attached_oper_down(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attached_oper_up(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP)
+ m_read_netlink_socket.side_effect = [data_op_up]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attach_ignore_existing(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth0"])
+ self.assertEqual(m_read_netlink_socket.call_count, 2)
+ self.assertEqual("eth1", ifread)
+
+ def test_nic_attach_read_first(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth1"])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual("eth0", ifread)
+
+ def test_nic_detached(self, m_read_netlink_socket, m_socket):
+ '''Test for an existing nic detached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_DELLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_detach_event(m_socket)
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+
+@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
+@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
class TestWaitForMediaDisconnectConnect(CiTestCase):
with_logs = True
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index 534314aa..e363c1f9 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -11,6 +11,7 @@ from cloudinit.version import version_string as vs
from cloudinit.tests.helpers import (
HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
ExitStack, resourceLocation)
+from cloudinit.sources.helpers import netlink
import copy
import crypt
@@ -78,6 +79,8 @@ def construct_valid_ovf_env(data=None, pubkeys=None,
if platform_settings:
for k, v in platform_settings.items():
content += "<%s>%s</%s>\n" % (k, v, k)
+ if "PreprovisionedVMType" not in platform_settings:
+ content += """<PreprovisionedVMType i:nil="true" />"""
content += """</PlatformSettings></wa:PlatformSettingsSection>
</Environment>"""
@@ -156,6 +159,31 @@ SECONDARY_INTERFACE = {
}
}
+IMDS_NETWORK_METADATA = {
+ "interface": [
+ {
+ "macAddress": "000D3A047598",
+ "ipv6": {
+ "ipAddress": []
+ },
+ "ipv4": {
+ "subnet": [
+ {
+ "prefix": "24",
+ "address": "10.0.0.0"
+ }
+ ],
+ "ipAddress": [
+ {
+ "privateIpAddress": "10.0.0.4",
+ "publicIpAddress": "104.46.124.81"
+ }
+ ]
+ }
+ }
+ ]
+}
+
MOCKPATH = 'cloudinit.sources.DataSourceAzure.'
@@ -366,8 +394,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2019-06-01"
@mock.patch(MOCKPATH + 'readurl')
- @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4', autospec=True)
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_does_not_dhcp_if_network_is_up(
self, m_net_is_up, m_dhcp, m_readurl):
"""Do not perform DHCP setup when nic is already up."""
@@ -384,9 +412,66 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
"Crawl of Azure Instance Metadata Service (IMDS) took", # log_time
self.logs.getvalue())
- @mock.patch(MOCKPATH + 'readurl')
- @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_compute_metadata_uses_compute_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3, md_type=dsaz.metadata_type.compute)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_network_metadata_uses_network_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3, md_type=dsaz.metadata_type.network)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance/network?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
@mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_default_metadata_uses_compute_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting', autospec=True)
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_performs_dhcp_when_network_is_down(
self, m_net_is_up, m_dhcp, m_readurl):
"""Perform DHCP setup when nic is not up."""
@@ -410,7 +495,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS)
@mock.patch('cloudinit.url_helper.time.sleep')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_from_imds_empty_when_no_imds_present(
self, m_net_is_up, m_sleep):
"""Return empty dict when IMDS network metadata is absent."""
@@ -431,7 +516,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
@mock.patch('requests.Session.request')
@mock.patch('cloudinit.url_helper.time.sleep')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_from_imds_retries_on_timeout(
self, m_net_is_up, m_sleep, m_request):
"""Retry IMDS network metadata on timeout errors."""
@@ -801,6 +886,7 @@ scbus-1 on xpt0 bus 0
'sys_cfg': {}}
dsrc = self._get_ds(data)
expected_cfg = {
+ 'PreprovisionedVMType': None,
'PreprovisionedVm': False,
'datasource': {'Azure': {'agent_command': 'my_command'}},
'system_info': {'default_user': {'name': u'myuser'}}}
@@ -864,6 +950,66 @@ scbus-1 on xpt0 bus 0
dsrc.crawl_metadata()
self.assertEqual(1, m_report_ready.call_count)
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting')
+ @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready')
+ @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure.'
+ '_wait_for_all_nics_ready')
+ def test_crawl_metadata_waits_for_nic_on_savable_vms(
+ self, detect_nics, poll_imds_func, report_ready_func, m_write, m_dhcp
+ ):
+ """If reprovisioning, report ready at the end"""
+ ovfenv = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Savable",
+ "PreprovisionedVm": "True"}
+ )
+
+ data = {
+ 'ovfcontent': ovfenv,
+ 'sys_cfg': {}
+ }
+ dsrc = self._get_ds(data)
+ poll_imds_func.return_value = ovfenv
+ dsrc.crawl_metadata()
+ self.assertEqual(1, report_ready_func.call_count)
+ self.assertEqual(1, detect_nics.call_count)
+
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting')
+ @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready')
+ @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure.'
+ '_wait_for_all_nics_ready')
+ @mock.patch('os.path.isfile')
+ def test_detect_nics_when_marker_present(
+ self, is_file, detect_nics, poll_imds_func, report_ready_func, m_write,
+ m_dhcp):
+ """If reprovisioning, wait for nic attach if marker present"""
+
+ def is_file_ret(key):
+ return key == dsaz.REPROVISION_NIC_ATTACH_MARKER_FILE
+
+ is_file.side_effect = is_file_ret
+ ovfenv = construct_valid_ovf_env()
+
+ data = {
+ 'ovfcontent': ovfenv,
+ 'sys_cfg': {}
+ }
+
+ dsrc = self._get_ds(data)
+ poll_imds_func.return_value = ovfenv
+ dsrc.crawl_metadata()
+ self.assertEqual(1, report_ready_func.call_count)
+ self.assertEqual(1, detect_nics.call_count)
+
@mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
@mock.patch('cloudinit.sources.helpers.netlink.'
'wait_for_media_disconnect_connect')
@@ -1526,7 +1672,7 @@ scbus-1 on xpt0 bus 0
@mock.patch('cloudinit.net.get_interface_mac')
@mock.patch('cloudinit.net.get_devicelist')
@mock.patch('cloudinit.net.device_driver')
- @mock.patch('cloudinit.net.generate_fallback_config')
+ @mock.patch('cloudinit.net.generate_fallback_config', autospec=True)
def test_fallback_network_config(self, mock_fallback, mock_dd,
mock_devlist, mock_get_mac):
"""On absent IMDS network data, generate network fallback config."""
@@ -1561,7 +1707,7 @@ scbus-1 on xpt0 bus 0
blacklist_drivers=['mlx4_core', 'mlx5_core'],
config_driver=True)
- @mock.patch(MOCKPATH + 'net.get_interfaces')
+ @mock.patch(MOCKPATH + 'net.get_interfaces', autospec=True)
@mock.patch(MOCKPATH + 'util.is_FreeBSD')
def test_blacklist_through_distro(
self, m_is_freebsd, m_net_get_interfaces):
@@ -1583,17 +1729,17 @@ scbus-1 on xpt0 bus 0
m_net_get_interfaces.assert_called_with(
blacklist_drivers=dsaz.BLACKLIST_DRIVERS)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_no_args(self, m_subp):
dsaz.get_hostname()
m_subp.assert_called_once_with(("hostname",), capture=True)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_string_arg(self, m_subp):
dsaz.get_hostname(hostname_command="hostname")
m_subp.assert_called_once_with(("hostname",), capture=True)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_iterable_arg(self, m_subp):
dsaz.get_hostname(hostname_command=("hostname",))
m_subp.assert_called_once_with(("hostname",), capture=True)
@@ -2224,6 +2370,29 @@ class TestPreprovisioningReadAzureOvfFlag(CiTestCase):
ret = dsaz.read_azure_ovf(content)
cfg = ret[2]
self.assertFalse(cfg['PreprovisionedVm'])
+ self.assertEqual(None, cfg["PreprovisionedVMType"])
+
+ def test_read_azure_ovf_with_running_type(self):
+ """The read_azure_ovf method should set PreprovisionedVMType
+ cfg flag to Running."""
+ content = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Running",
+ "PreprovisionedVm": "True"})
+ ret = dsaz.read_azure_ovf(content)
+ cfg = ret[2]
+ self.assertTrue(cfg['PreprovisionedVm'])
+ self.assertEqual("Running", cfg['PreprovisionedVMType'])
+
+ def test_read_azure_ovf_with_savable_type(self):
+ """The read_azure_ovf method should set PreprovisionedVMType
+ cfg flag to Savable."""
+ content = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Savable",
+ "PreprovisionedVm": "True"})
+ ret = dsaz.read_azure_ovf(content)
+ cfg = ret[2]
+ self.assertTrue(cfg['PreprovisionedVm'])
+ self.assertEqual("Savable", cfg['PreprovisionedVMType'])
@mock.patch('os.path.isfile')
@@ -2273,6 +2442,227 @@ class TestPreprovisioningShouldReprovision(CiTestCase):
_poll_imds.assert_called_with()
+class TestPreprovisioningHotAttachNics(CiTestCase):
+
+ def setUp(self):
+ super(TestPreprovisioningHotAttachNics, self).setUp()
+ self.tmp = self.tmp_dir()
+ self.waagent_d = self.tmp_path('/var/lib/waagent', self.tmp)
+ self.paths = helpers.Paths({'cloud_dir': self.tmp})
+ dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d
+ self.paths = helpers.Paths({'cloud_dir': self.tmp})
+
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_detach_event',
+ autospec=True)
+ @mock.patch(MOCKPATH + 'util.write_file', autospec=True)
+ def test_nic_detach_writes_marker(self, m_writefile, m_detach):
+ """When we detect that a nic gets detached, we write a marker for it"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ nl_sock = mock.MagicMock()
+ dsa._wait_for_nic_detach(nl_sock)
+ m_detach.assert_called_with(nl_sock)
+ self.assertEqual(1, m_detach.call_count)
+ m_writefile.assert_called_with(
+ dsaz.REPROVISION_NIC_DETACHED_MARKER_FILE, mock.ANY)
+
+ @mock.patch(MOCKPATH + 'util.write_file', autospec=True)
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_reports_ready_and_waits_for_detach(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if,
+ m_writefile):
+ """Report ready first and then wait for nic detach"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(1, m_report_ready.call_count)
+ self.assertEqual(1, m_detach.call_count)
+ self.assertEqual(1, m_writefile.call_count)
+ self.assertEqual(1, m_dhcp.call_count)
+ m_writefile.assert_called_with(dsaz.REPORTED_READY_MARKER_FILE,
+ mock.ANY)
+
+ @mock.patch('os.path.isfile')
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_skips_report_ready_when_marker_present(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile):
+ """Skip reporting ready if we already have a marker file."""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+
+ def isfile(key):
+ return key == dsaz.REPORTED_READY_MARKER_FILE
+
+ m_isfile.side_effect = isfile
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(0, m_report_ready.call_count)
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(1, m_detach.call_count)
+
+ @mock.patch('os.path.isfile')
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_skips_nic_detach_when_marker_present(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile):
+ """Skip wait for nic detach if it already happened."""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+
+ m_isfile.return_value = True
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(0, m_report_ready.call_count)
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(0, m_detach.call_count)
+
+ @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up', autospec=True)
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event')
+ @mock.patch('cloudinit.sources.net.find_fallback_nic')
+ @mock.patch(MOCKPATH + 'get_metadata_from_imds')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ @mock.patch('os.path.isfile')
+ def test_wait_for_nic_attach_if_no_fallback_interface(
+ self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if,
+ m_attach, m_link_up):
+ """Wait for nic attach if we do not have a fallback interface"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ lease = {
+ 'interface': 'eth9', 'fixed-address': '192.168.2.9',
+ 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
+ 'unknown-245': '624c3620'}
+
+ m_isfile.return_value = True
+ m_attach.return_value = "eth0"
+ dhcp_ctx = mock.MagicMock(lease=lease)
+ dhcp_ctx.obtain_lease.return_value = lease
+ m_dhcpv4.return_value = dhcp_ctx
+ m_imds.return_value = IMDS_NETWORK_METADATA
+ m_fallback_if.return_value = None
+
+ dsa._wait_for_all_nics_ready()
+
+ self.assertEqual(0, m_detach.call_count)
+ self.assertEqual(1, m_attach.call_count)
+ self.assertEqual(1, m_dhcpv4.call_count)
+ self.assertEqual(1, m_imds.call_count)
+ self.assertEqual(1, m_link_up.call_count)
+ m_link_up.assert_called_with(mock.ANY, "eth0")
+
+ @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up')
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event')
+ @mock.patch('cloudinit.sources.net.find_fallback_nic')
+ @mock.patch(MOCKPATH + 'get_metadata_from_imds')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ @mock.patch('os.path.isfile')
+ def test_wait_for_nic_attach_multinic_attach(
+ self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if,
+ m_attach, m_link_up):
+ """Wait for nic attach if we do not have a fallback interface"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ lease = {
+ 'interface': 'eth9', 'fixed-address': '192.168.2.9',
+ 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
+ 'unknown-245': '624c3620'}
+ m_attach_call_count = 0
+
+ def nic_attach_ret(nl_sock, nics_found):
+ nonlocal m_attach_call_count
+ if m_attach_call_count == 0:
+ m_attach_call_count = m_attach_call_count + 1
+ return "eth0"
+ return "eth1"
+
+ def network_metadata_ret(ifname, retries, type):
+ # Simulate two NICs by adding the same one twice.
+ md = IMDS_NETWORK_METADATA
+ md['interface'].append(md['interface'][0])
+ if ifname == "eth0":
+ return md
+ raise requests.Timeout('Fake connection timeout')
+
+ m_isfile.return_value = True
+ m_attach.side_effect = nic_attach_ret
+ dhcp_ctx = mock.MagicMock(lease=lease)
+ dhcp_ctx.obtain_lease.return_value = lease
+ m_dhcpv4.return_value = dhcp_ctx
+ m_imds.side_effect = network_metadata_ret
+ m_fallback_if.return_value = None
+
+ dsa._wait_for_all_nics_ready()
+
+ self.assertEqual(0, m_detach.call_count)
+ self.assertEqual(2, m_attach.call_count)
+ # DHCP and network metadata calls will only happen on the primary NIC.
+ self.assertEqual(1, m_dhcpv4.call_count)
+ self.assertEqual(1, m_imds.call_count)
+ self.assertEqual(2, m_link_up.call_count)
+
+ @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up')
+ def test_wait_for_link_up_returns_if_already_up(
+ self, m_is_link_up):
+ """Waiting for link to be up should return immediately if the link is
+ already up."""
+
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+ m_is_link_up.return_value = True
+
+ dsa.wait_for_link_up("eth0")
+ self.assertEqual(1, m_is_link_up.call_count)
+
+ @mock.patch(MOCKPATH + 'util.write_file')
+ @mock.patch('cloudinit.net.read_sys_net')
+ @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up')
+ def test_wait_for_link_up_writes_to_device_file(
+ self, m_is_link_up, m_read_sys_net, m_writefile):
+ """Waiting for link to be up should return immediately if the link is
+ already up."""
+
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+
+ callcount = 0
+
+ def linkup(key):
+ nonlocal callcount
+ if callcount == 0:
+ callcount += 1
+ return False
+ return True
+
+ m_is_link_up.side_effect = linkup
+
+ dsa.wait_for_link_up("eth0")
+ self.assertEqual(2, m_is_link_up.call_count)
+ self.assertEqual(1, m_read_sys_net.call_count)
+ self.assertEqual(2, m_writefile.call_count)
+
+ @mock.patch('cloudinit.sources.helpers.netlink.'
+ 'create_bound_netlink_socket')
+ def test_wait_for_all_nics_ready_raises_if_socket_fails(self, m_socket):
+ """Waiting for all nics should raise exception if netlink socket
+ creation fails."""
+
+ m_socket.side_effect = netlink.NetlinkCreateSocketError
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+
+ self.assertRaises(netlink.NetlinkCreateSocketError,
+ dsa._wait_for_all_nics_ready)
+ # dsa._wait_for_all_nics_ready()
+
+
@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network')
@mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
@mock.patch('cloudinit.sources.helpers.netlink.'
@@ -2330,6 +2720,24 @@ class TestPreprovisioningPollIMDS(CiTestCase):
self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls')
self.assertEqual(4, self.tries, 'Expected 4 total reads from IMDS')
+ @mock.patch('os.path.isfile')
+ def test_poll_imds_skips_dhcp_if_ctx_present(
+ self, m_isfile, report_ready_func, fake_resp, m_media_switch,
+ m_dhcp, m_net):
+ """The poll_imds function should reuse the dhcp ctx if it is already
+ present. This happens when we wait for nic to be hot-attached before
+ polling for reprovisiondata. Note that if this ctx is set when
+ _poll_imds is called, then it is not expected to be waiting for
+ media_disconnect_connect either."""
+ report_file = self.tmp_path('report_marker', self.tmp)
+ m_isfile.return_value = True
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ dsa._ephemeral_dhcp_ctx = "Dummy dhcp ctx"
+ with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file):
+ dsa._poll_imds()
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(0, m_media_switch.call_count)
+
def test_does_not_poll_imds_report_ready_when_marker_file_exists(
self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net):
"""poll_imds should not call report ready when the reported ready
@@ -2390,7 +2798,7 @@ class TestPreprovisioningPollIMDS(CiTestCase):
@mock.patch(MOCKPATH + 'util.is_FreeBSD')
@mock.patch('cloudinit.sources.helpers.netlink.'
'wait_for_media_disconnect_connect')
-@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network')
+@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network', autospec=True)
@mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
@mock.patch('requests.Session.request')
class TestAzureDataSourcePreprovisioning(CiTestCase):
@@ -2412,7 +2820,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
m_dhcp.return_value = [{
'interface': 'eth9', 'fixed-address': '192.168.2.9',
'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0'}]
- url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02'
+ url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01'
host = "169.254.169.254"
full_url = url.format(host)
m_request.return_value = mock.MagicMock(status_code=200, text="ovf",
@@ -2445,7 +2853,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
'interface': 'eth9', 'fixed-address': '192.168.2.9',
'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
'unknown-245': '624c3620'}]
- url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02'
+ url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01'
host = "169.254.169.254"
full_url = url.format(host)
hostname = "myhost"
diff --git a/tools/.github-cla-signers b/tools/.github-cla-signers
index 9b594a44..1e0c3ea4 100644
--- a/tools/.github-cla-signers
+++ b/tools/.github-cla-signers
@@ -1,6 +1,7 @@
ader1990
AlexBaranowski
Aman306
+aswinrajamannar
beezly
bipinbachhao
BirknerAlex