summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cloudinit/distros/networking.py15
-rw-r--r--cloudinit/distros/tests/test_networking.py31
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py515
-rw-r--r--cloudinit/sources/helpers/netlink.py102
-rw-r--r--cloudinit/sources/helpers/tests/test_netlink.py74
-rw-r--r--tests/unittests/test_datasource/test_azure.py436
-rw-r--r--tools/.github-cla-signers1
7 files changed, 1109 insertions, 65 deletions
diff --git a/cloudinit/distros/networking.py b/cloudinit/distros/networking.py
index e407fa29..c291196a 100644
--- a/cloudinit/distros/networking.py
+++ b/cloudinit/distros/networking.py
@@ -2,6 +2,7 @@ import abc
import logging
import os
+from cloudinit import subp
from cloudinit import net, util
@@ -175,6 +176,10 @@ class Networking(metaclass=abc.ABCMeta):
if strict:
raise RuntimeError(msg)
+ @abc.abstractmethod
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ """Try setting the link to up explicitly and return if it is up."""
+
class BSDNetworking(Networking):
"""Implementation of networking functionality shared across BSDs."""
@@ -185,6 +190,9 @@ class BSDNetworking(Networking):
def settle(self, *, exists=None) -> None:
"""BSD has no equivalent to `udevadm settle`; noop."""
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ raise NotImplementedError()
+
class LinuxNetworking(Networking):
"""Implementation of networking functionality common to Linux distros."""
@@ -214,3 +222,10 @@ class LinuxNetworking(Networking):
if exists is not None:
exists = net.sys_dev_path(exists)
util.udevadm_settle(exists=exists)
+
+ def try_set_link_up(self, devname: DeviceName) -> bool:
+ """Try setting the link to up explicitly and return if it is up.
+ Not guaranteed to bring the interface up. The caller is expected to
+ add wait times before retrying."""
+ subp.subp(['ip', 'link', 'set', devname, 'up'])
+ return self.is_up(devname)
diff --git a/cloudinit/distros/tests/test_networking.py b/cloudinit/distros/tests/test_networking.py
index b9a63842..ec508f4d 100644
--- a/cloudinit/distros/tests/test_networking.py
+++ b/cloudinit/distros/tests/test_networking.py
@@ -30,6 +30,9 @@ def generic_networking_cls():
def settle(self, *args, **kwargs):
raise NotImplementedError
+ def try_set_link_up(self, *args, **kwargs):
+ raise NotImplementedError
+
error = AssertionError("Unexpectedly used /sys in generic networking code")
with mock.patch(
"cloudinit.net.get_sys_class_path", side_effect=error,
@@ -74,6 +77,34 @@ class TestLinuxNetworkingIsPhysical:
assert LinuxNetworking().is_physical(devname)
+class TestBSDNetworkingTrySetLinkUp:
+ def test_raises_notimplementederror(self):
+ with pytest.raises(NotImplementedError):
+ BSDNetworking().try_set_link_up("eth0")
+
+
+@mock.patch("cloudinit.net.is_up")
+@mock.patch("cloudinit.distros.networking.subp.subp")
+class TestLinuxNetworkingTrySetLinkUp:
+ def test_calls_subp_return_true(self, m_subp, m_is_up):
+ devname = "eth0"
+ m_is_up.return_value = True
+ is_success = LinuxNetworking().try_set_link_up(devname)
+
+ assert (mock.call(['ip', 'link', 'set', devname, 'up']) ==
+ m_subp.call_args_list[-1])
+ assert is_success
+
+ def test_calls_subp_return_false(self, m_subp, m_is_up):
+ devname = "eth0"
+ m_is_up.return_value = False
+ is_success = LinuxNetworking().try_set_link_up(devname)
+
+ assert (mock.call(['ip', 'link', 'set', devname, 'up']) ==
+ m_subp.call_args_list[-1])
+ assert not is_success
+
+
class TestBSDNetworkingSettle:
def test_settle_doesnt_error(self):
# This also implicitly tests that it doesn't use subp.subp
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index f777a007..04ff2131 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -12,8 +12,10 @@ import os
import os.path
import re
from time import time
+from time import sleep
from xml.dom import minidom
import xml.etree.ElementTree as ET
+from enum import Enum
from cloudinit import dmi
from cloudinit import log as logging
@@ -67,13 +69,27 @@ DEFAULT_FS = 'ext4'
# DMI chassis-asset-tag is set static for all azure instances
AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
+REPROVISION_NIC_ATTACH_MARKER_FILE = "/var/lib/cloud/data/wait_for_nic_attach"
+REPROVISION_NIC_DETACHED_MARKER_FILE = "/var/lib/cloud/data/nic_detached"
REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
AGENT_SEED_DIR = '/var/lib/waagent'
+
# In the event where the IMDS primary server is not
# available, it takes 1s to fallback to the secondary one
IMDS_TIMEOUT_IN_SECONDS = 2
IMDS_URL = "http://169.254.169.254/metadata/"
+IMDS_VER = "2019-06-01"
+IMDS_VER_PARAM = "api-version={}".format(IMDS_VER)
+
+
+class metadata_type(Enum):
+ compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM)
+ network = "{}instance/network?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+ reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL,
+ IMDS_VER_PARAM)
+
PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
@@ -434,20 +450,39 @@ class DataSourceAzure(sources.DataSource):
# need to look in the datadir and consider that valid
ddir = self.ds_cfg['data_dir']
+ # The order in which the candidates are inserted matters here, because
+ # it determines the value of ret. More specifically, the first one in
+ # the candidate list determines the path to take in order to get the
+ # metadata we need.
candidates = [self.seed_dir]
if os.path.isfile(REPROVISION_MARKER_FILE):
candidates.insert(0, "IMDS")
+ report_diagnostic_event("Reprovision marker file already present "
+ "before crawling Azure metadata: %s" %
+ REPROVISION_MARKER_FILE,
+ logger_func=LOG.debug)
+ elif os.path.isfile(REPROVISION_NIC_ATTACH_MARKER_FILE):
+ candidates.insert(0, "NIC_ATTACH_MARKER_PRESENT")
+ report_diagnostic_event("Reprovision nic attach marker file "
+ "already present before crawling Azure "
+ "metadata: %s" %
+ REPROVISION_NIC_ATTACH_MARKER_FILE,
+ logger_func=LOG.debug)
candidates.extend(list_possible_azure_ds_devs())
if ddir:
candidates.append(ddir)
found = None
reprovision = False
+ reprovision_after_nic_attach = False
for cdev in candidates:
try:
if cdev == "IMDS":
ret = None
reprovision = True
+ elif cdev == "NIC_ATTACH_MARKER_PRESENT":
+ ret = None
+ reprovision_after_nic_attach = True
elif cdev.startswith("/dev/"):
if util.is_FreeBSD():
ret = util.mount_cb(cdev, load_azure_ds_dir,
@@ -472,12 +507,19 @@ class DataSourceAzure(sources.DataSource):
continue
perform_reprovision = reprovision or self._should_reprovision(ret)
- if perform_reprovision:
+ perform_reprovision_after_nic_attach = (
+ reprovision_after_nic_attach or
+ self._should_reprovision_after_nic_attach(ret))
+
+ if perform_reprovision or perform_reprovision_after_nic_attach:
if util.is_FreeBSD():
msg = "Free BSD is not supported for PPS VMs"
report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
+ if perform_reprovision_after_nic_attach:
+ self._wait_for_all_nics_ready()
ret = self._reprovision()
+
imds_md = get_metadata_from_imds(
self.fallback_interface, retries=10)
(md, userdata_raw, cfg, files) = ret
@@ -508,7 +550,7 @@ class DataSourceAzure(sources.DataSource):
crawled_data['metadata']['random_seed'] = seed
crawled_data['metadata']['instance-id'] = self._iid()
- if perform_reprovision:
+ if perform_reprovision or perform_reprovision_after_nic_attach:
LOG.info("Reporting ready to Azure after getting ReprovisionData")
use_cached_ephemeral = (
self.distro.networking.is_up(self.fallback_interface) and
@@ -617,14 +659,12 @@ class DataSourceAzure(sources.DataSource):
LOG.debug('Retrieved SSH keys from IMDS')
except KeyError:
log_msg = 'Unable to get keys from IMDS, falling back to OVF'
- LOG.debug(log_msg)
report_diagnostic_event(log_msg, logger_func=LOG.debug)
try:
ssh_keys = self.metadata['public-keys']
LOG.debug('Retrieved keys from OVF')
except KeyError:
log_msg = 'No keys available from OVF'
- LOG.debug(log_msg)
report_diagnostic_event(log_msg, logger_func=LOG.debug)
return ssh_keys
@@ -660,10 +700,293 @@ class DataSourceAzure(sources.DataSource):
LOG.debug("negotiating already done for %s",
self.get_instance_id())
+ @azure_ds_telemetry_reporter
+ def _wait_for_nic_detach(self, nl_sock):
+ """Use the netlink socket provided to wait for nic detach event.
+ NOTE: The function doesn't close the socket. The caller owns closing
+ the socket and disposing it safely.
+ """
+ try:
+ ifname = None
+
+ # Preprovisioned VM will only have one NIC, and it gets
+ # detached immediately after deployment.
+ with events.ReportEventStack(
+ name="wait-for-nic-detach",
+ description=("wait for nic detach"),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_detach_event(nl_sock)
+ if ifname is None:
+ msg = ("Preprovisioned nic not detached as expected. "
+ "Proceeding without failing.")
+ report_diagnostic_event(msg, logger_func=LOG.warning)
+ else:
+ report_diagnostic_event("The preprovisioned nic %s is detached"
+ % ifname, logger_func=LOG.warning)
+ path = REPROVISION_NIC_DETACHED_MARKER_FILE
+ LOG.info("Creating a marker file for nic detached: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+ raise
+
+ @azure_ds_telemetry_reporter
+ def wait_for_link_up(self, ifname):
+ """In cases where the link state is still showing down after a nic is
+ hot-attached, we can attempt to bring it up by forcing the hv_netvsc
+ drivers to query the link state by unbinding and then binding the
+ device. This function attempts infinitely until the link is up,
+ because we cannot proceed further until we have a stable link."""
+
+ if self.distro.networking.try_set_link_up(ifname):
+ report_diagnostic_event("The link %s is already up." % ifname,
+ logger_func=LOG.info)
+ return
+
+ LOG.info("Attempting to bring %s up", ifname)
+
+ attempts = 0
+ while True:
+
+ LOG.info("Unbinding and binding the interface %s", ifname)
+ devicename = net.read_sys_net(ifname,
+ 'device/device_id').strip('{}')
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind',
+ devicename)
+ util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/bind',
+ devicename)
+
+ attempts = attempts + 1
+ if self.distro.networking.try_set_link_up(ifname):
+ msg = "The link %s is up after %s attempts" % (ifname,
+ attempts)
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ return
+
+ sleep_duration = 1
+ msg = ("Link is not up after %d attempts with %d seconds sleep "
+ "between attempts." % (attempts, sleep_duration))
+
+ if attempts % 10 == 0:
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ else:
+ LOG.info(msg)
+
+ sleep(sleep_duration)
+
+ @azure_ds_telemetry_reporter
+ def _create_report_ready_marker(self):
+ path = REPORTED_READY_MARKER_FILE
+ LOG.info(
+ "Creating a marker file to report ready: %s", path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ report_diagnostic_event(
+ 'Successfully created reported ready marker file '
+ 'while in the preprovisioning pool.',
+ logger_func=LOG.debug)
+
+ @azure_ds_telemetry_reporter
+ def _report_ready_if_needed(self):
+ """Report ready to the platform if the marker file is not present,
+ and create the marker file.
+ """
+ have_not_reported_ready = (
+ not os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ if have_not_reported_ready:
+ report_diagnostic_event("Reporting ready before nic detach",
+ logger_func=LOG.info)
+ try:
+ with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease:
+ self._report_ready(lease=lease)
+ except Exception as e:
+ report_diagnostic_event("Exception reporting ready during "
+ "preprovisioning before nic detach: %s"
+ % e, logger_func=LOG.error)
+ raise
+ self._create_report_ready_marker()
+ else:
+ report_diagnostic_event("Already reported ready before nic detach."
+ " The marker file already exists: %s" %
+ REPORTED_READY_MARKER_FILE,
+ logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _check_if_nic_is_primary(self, ifname):
+ """Check if a given interface is the primary nic or not. If it is the
+ primary nic, then we also get the expected total nic count from IMDS.
+ IMDS will process the request and send a response only for primary NIC.
+ """
+ is_primary = False
+ expected_nic_count = -1
+ imds_md = None
+
+ # For now, only a VM's primary NIC can contact IMDS and WireServer. If
+ # DHCP fails for a NIC, we have no mechanism to determine if the NIC is
+ # primary or secondary. In this case, the desired behavior is to fail
+ # VM provisioning if there is any DHCP failure when trying to determine
+ # the primary NIC.
+ try:
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description=("obtain dhcp lease for %s when attempting to "
+ "determine primary NIC during reprovision of "
+ "a pre-provisioned VM" % ifname),
+ parent=azure_ds_reporter):
+ dhcp_ctx = EphemeralDHCPv4(
+ iface=ifname,
+ dhcp_log_func=dhcp_log_cb)
+ dhcp_ctx.obtain_lease()
+ except Exception as e:
+ report_diagnostic_event("Giving up. Failed to obtain dhcp lease "
+ "for %s when attempting to determine "
+ "primary NIC during reprovision due to %s"
+ % (ifname, e), logger_func=LOG.error)
+ raise
+
+ # Primary nic detection will be optimized in the future. The fact that
+ # primary nic is being attached first helps here. Otherwise each nic
+ # could add several seconds of delay.
+ try:
+ imds_md = get_metadata_from_imds(
+ ifname,
+ 5,
+ metadata_type.network)
+ except Exception as e:
+ LOG.warning(
+ "Failed to get network metadata using nic %s. Attempt to "
+ "contact IMDS failed with error %s. Assuming this is not the "
+ "primary nic.", ifname, e)
+ finally:
+ # If we are not the primary nic, then clean the dhcp context.
+ if imds_md is None:
+ dhcp_ctx.clean_network()
+
+ if imds_md is not None:
+ # Only primary NIC will get a response from IMDS.
+ LOG.info("%s is the primary nic", ifname)
+ is_primary = True
+
+ # If primary, set ephemeral dhcp ctx so we can report ready
+ self._ephemeral_dhcp_ctx = dhcp_ctx
+
+ # Set the expected nic count based on the response received.
+ expected_nic_count = len(
+ imds_md['interface'])
+ report_diagnostic_event("Expected nic count: %d" %
+ expected_nic_count, logger_func=LOG.info)
+
+ return is_primary, expected_nic_count
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_hot_attached_nics(self, nl_sock):
+ """Wait until all the expected nics for the vm are hot-attached.
+ The expected nic count is obtained by requesting the network metadata
+ from IMDS.
+ """
+ LOG.info("Waiting for nics to be hot-attached")
+ try:
+ # Wait for nics to be attached one at a time, until we know for
+ # sure that all nics have been attached.
+ nics_found = []
+ primary_nic_found = False
+ expected_nic_count = -1
+
+ # Wait for netlink nic attach events. After the first nic is
+ # attached, we are already in the customer vm deployment path and
+ # so eerything from then on should happen fast and avoid
+ # unnecessary delays wherever possible.
+ while True:
+ ifname = None
+ with events.ReportEventStack(
+ name="wait-for-nic-attach",
+ description=("wait for nic attach after %d nics have "
+ "been attached" % len(nics_found)),
+ parent=azure_ds_reporter):
+ ifname = netlink.wait_for_nic_attach_event(nl_sock,
+ nics_found)
+
+ # wait_for_nic_attach_event guarantees that ifname it not None
+ nics_found.append(ifname)
+ report_diagnostic_event("Detected nic %s attached." % ifname,
+ logger_func=LOG.info)
+
+ # Attempt to bring the interface's operating state to
+ # UP in case it is not already.
+ self.wait_for_link_up(ifname)
+
+ # If primary nic is not found, check if this is it. The
+ # platform will attach the primary nic first so we
+ # won't be in primary_nic_found = false state for long.
+ if not primary_nic_found:
+ LOG.info("Checking if %s is the primary nic",
+ ifname)
+ (primary_nic_found, expected_nic_count) = (
+ self._check_if_nic_is_primary(ifname))
+
+ # Exit criteria: check if we've discovered all nics
+ if (expected_nic_count != -1
+ and len(nics_found) >= expected_nic_count):
+ LOG.info("Found all the nics for this VM.")
+ break
+
+ except AssertionError as error:
+ report_diagnostic_event(error, logger_func=LOG.error)
+
+ @azure_ds_telemetry_reporter
+ def _wait_for_all_nics_ready(self):
+ """Wait for nic(s) to be hot-attached. There may be multiple nics
+ depending on the customer request.
+ But only primary nic would be able to communicate with wireserver
+ and IMDS. So we detect and save the primary nic to be used later.
+ """
+
+ nl_sock = None
+ try:
+ nl_sock = netlink.create_bound_netlink_socket()
+
+ report_ready_marker_present = bool(
+ os.path.isfile(REPORTED_READY_MARKER_FILE))
+
+ # Report ready if the marker file is not already present.
+ # The nic of the preprovisioned vm gets hot-detached as soon as
+ # we report ready. So no need to save the dhcp context.
+ self._report_ready_if_needed()
+
+ has_nic_been_detached = bool(
+ os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE))
+
+ if not has_nic_been_detached:
+ LOG.info("NIC has not been detached yet.")
+ self._wait_for_nic_detach(nl_sock)
+
+ # If we know that the preprovisioned nic has been detached, and we
+ # still have a fallback nic, then it means the VM must have
+ # rebooted as part of customer assignment, and all the nics have
+ # already been attached by the Azure platform. So there is no need
+ # to wait for nics to be hot-attached.
+ if not self.fallback_interface:
+ self._wait_for_hot_attached_nics(nl_sock)
+ else:
+ report_diagnostic_event("Skipping waiting for nic attach "
+ "because we already have a fallback "
+ "interface. Report Ready marker "
+ "present before detaching nics: %s" %
+ report_ready_marker_present,
+ logger_func=LOG.info)
+ except netlink.NetlinkCreateSocketError as e:
+ report_diagnostic_event(e, logger_func=LOG.warning)
+ raise
+ finally:
+ if nl_sock:
+ nl_sock.close()
+
def _poll_imds(self):
"""Poll IMDS for the new provisioning data until we get a valid
response. Then return the returned JSON object."""
- url = IMDS_URL + "reprovisiondata?api-version=2017-04-02"
+ url = metadata_type.reprovisiondata.value
headers = {"Metadata": "true"}
nl_sock = None
report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE))
@@ -705,17 +1028,31 @@ class DataSourceAzure(sources.DataSource):
logger_func=LOG.warning)
return False
- LOG.debug("Wait for vnetswitch to happen")
+ # When the interface is hot-attached, we would have already
+ # done dhcp and set the dhcp context. In that case, skip
+ # the attempt to do dhcp.
+ is_ephemeral_ctx_present = self._ephemeral_dhcp_ctx is not None
+ msg = ("Unexpected error. Dhcp context is not expected to be already "
+ "set when we need to wait for vnet switch")
+ if is_ephemeral_ctx_present and report_ready:
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ raise RuntimeError(msg)
+
while True:
try:
- # Save our EphemeralDHCPv4 context to avoid repeated dhcp
- with events.ReportEventStack(
- name="obtain-dhcp-lease",
- description="obtain dhcp lease",
- parent=azure_ds_reporter):
- self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
- dhcp_log_func=dhcp_log_cb)
- lease = self._ephemeral_dhcp_ctx.obtain_lease()
+ # Since is_ephemeral_ctx_present is set only once, this ensures
+ # that with regular reprovisioning, dhcp is always done every
+ # time the loop runs.
+ if not is_ephemeral_ctx_present:
+ # Save our EphemeralDHCPv4 context to avoid repeated dhcp
+ # later when we report ready
+ with events.ReportEventStack(
+ name="obtain-dhcp-lease",
+ description="obtain dhcp lease",
+ parent=azure_ds_reporter):
+ self._ephemeral_dhcp_ctx = EphemeralDHCPv4(
+ dhcp_log_func=dhcp_log_cb)
+ lease = self._ephemeral_dhcp_ctx.obtain_lease()
if vnet_switched:
dhcp_attempts += 1
@@ -737,17 +1074,10 @@ class DataSourceAzure(sources.DataSource):
self._ephemeral_dhcp_ctx.clean_network()
raise sources.InvalidMetaDataException(msg)
- path = REPORTED_READY_MARKER_FILE
- LOG.info(
- "Creating a marker file to report ready: %s", path)
- util.write_file(path, "{pid}: {time}\n".format(
- pid=os.getpid(), time=time()))
- report_diagnostic_event(
- 'Successfully created reported ready marker file '
- 'while in the preprovisioning pool.',
- logger_func=LOG.debug)
+ self._create_report_ready_marker()
report_ready = False
+ LOG.debug("Wait for vnetswitch to happen")
with events.ReportEventStack(
name="wait-for-media-disconnect-connect",
description="wait for vnet switch",
@@ -863,6 +1193,35 @@ class DataSourceAzure(sources.DataSource):
"connectivity issues: %s" % e, logger_func=LOG.warning)
return False
+ def _should_reprovision_after_nic_attach(self, candidate_metadata) -> bool:
+ """Whether or not we should wait for nic attach and then poll
+ IMDS for reprovisioning data. Also sets a marker file to poll IMDS.
+
+ The marker file is used for the following scenario: the VM boots into
+ wait for nic attach, which we expect to be proceeding infinitely until
+ the nic is attached. If for whatever reason the platform moves us to a
+ new host (for instance a hardware issue), we need to keep waiting.
+ However, since the VM reports ready to the Fabric, we will not attach
+ the ISO, thus cloud-init needs to have a way of knowing that it should
+ jump back into the waiting mode in order to retrieve the ovf_env.
+
+ @param candidate_metadata: Metadata obtained from reading ovf-env.
+ @return: Whether to reprovision after waiting for nics to be attached.
+ """
+ if not candidate_metadata:
+ return False
+ (_md, _userdata_raw, cfg, _files) = candidate_metadata
+ path = REPROVISION_NIC_ATTACH_MARKER_FILE
+ if (cfg.get('PreprovisionedVMType', None) == "Savable" or
+ os.path.isfile(path)):
+ if not os.path.isfile(path):
+ LOG.info("Creating a marker file to wait for nic attach: %s",
+ path)
+ util.write_file(path, "{pid}: {time}\n".format(
+ pid=os.getpid(), time=time()))
+ return True
+ return False
+
def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.
Also sets a marker file to poll IMDS.
@@ -879,6 +1238,7 @@ class DataSourceAzure(sources.DataSource):
(_md, _userdata_raw, cfg, _files) = ret
path = REPROVISION_MARKER_FILE
if (cfg.get('PreprovisionedVm') is True or
+ cfg.get('PreprovisionedVMType', None) == 'Running' or
os.path.isfile(path)):
if not os.path.isfile(path):
LOG.info("Creating a marker file to poll imds: %s",
@@ -944,6 +1304,8 @@ class DataSourceAzure(sources.DataSource):
util.del_file(REPORTED_READY_MARKER_FILE)
util.del_file(REPROVISION_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_ATTACH_MARKER_FILE)
+ util.del_file(REPROVISION_NIC_DETACHED_MARKER_FILE)
return fabric_data
@azure_ds_telemetry_reporter
@@ -1399,34 +1761,109 @@ def read_azure_ovf(contents):
if 'ssh_pwauth' not in cfg and password:
cfg['ssh_pwauth'] = True
- cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom)
+ preprovisioning_cfg = _get_preprovisioning_cfgs(dom)
+ cfg = util.mergemanydict([cfg, preprovisioning_cfg])
return (md, ud, cfg)
@azure_ds_telemetry_reporter
-def _extract_preprovisioned_vm_setting(dom):
- """Read the preprovision flag from the ovf. It should not
- exist unless true."""
+def _get_preprovisioning_cfgs(dom):
+ """Read the preprovisioning related flags from ovf and populates a dict
+ with the info.
+
+ Two flags are in use today: PreprovisionedVm bool and
+ PreprovisionedVMType enum. In the long term, the PreprovisionedVm bool
+ will be deprecated in favor of PreprovisionedVMType string/enum.
+
+ Only these combinations of values are possible today:
+ - PreprovisionedVm=True and PreprovisionedVMType=Running
+ - PreprovisionedVm=False and PreprovisionedVMType=Savable
+ - PreprovisionedVm is missing and PreprovisionedVMType=Running/Savable
+ - PreprovisionedVm=False and PreprovisionedVMType is missing
+
+ More specifically, this will never happen:
+ - PreprovisionedVm=True and PreprovisionedVMType=Savable
+ """
+ cfg = {
+ "PreprovisionedVm": False,
+ "PreprovisionedVMType": None
+ }
+
platform_settings_section = find_child(
dom.documentElement,
lambda n: n.localName == "PlatformSettingsSection")
if not platform_settings_section or len(platform_settings_section) == 0:
LOG.debug("PlatformSettingsSection not found")
- return False
+ return cfg
platform_settings = find_child(
platform_settings_section[0],
lambda n: n.localName == "PlatformSettings")
if not platform_settings or len(platform_settings) == 0:
LOG.debug("PlatformSettings not found")
- return False
- preprovisionedVm = find_child(
+ return cfg
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ cfg["PreprovisionedVm"] = _get_preprovisionedvm_cfg_value(
+ platform_settings)
+
+ cfg["PreprovisionedVMType"] = _get_preprovisionedvmtype_cfg_value(
+ platform_settings)
+ return cfg
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvm_cfg_value(platform_settings):
+ preprovisionedVm = False
+
+ # Read the PreprovisionedVm bool flag. This should be deprecated when the
+ # platform has removed PreprovisionedVm and only surfaces
+ # PreprovisionedVMType.
+ preprovisionedVmVal = find_child(
platform_settings[0],
lambda n: n.localName == "PreprovisionedVm")
- if not preprovisionedVm or len(preprovisionedVm) == 0:
+ if not preprovisionedVmVal or len(preprovisionedVmVal) == 0:
LOG.debug("PreprovisionedVm not found")
- return False
- return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue)
+ return preprovisionedVm
+ preprovisionedVm = util.translate_bool(
+ preprovisionedVmVal[0].firstChild.nodeValue)
+
+ report_diagnostic_event(
+ "PreprovisionedVm: %s" % preprovisionedVm, logger_func=LOG.info)
+
+ return preprovisionedVm
+
+
+@azure_ds_telemetry_reporter
+def _get_preprovisionedvmtype_cfg_value(platform_settings):
+ preprovisionedVMType = None
+
+ # Read the PreprovisionedVMType value from the ovf. It can be
+ # 'Running' or 'Savable' or not exist. This enum value is intended to
+ # replace PreprovisionedVm bool flag in the long term.
+ # A Running VM is the same as preprovisioned VMs of today. This is
+ # equivalent to having PreprovisionedVm=True.
+ # A Savable VM is one whose nic is hot-detached immediately after it
+ # reports ready the first time to free up the network resources.
+ # Once assigned to customer, the customer-requested nics are
+ # hot-attached to it and reprovision happens like today.
+ preprovisionedVMTypeVal = find_child(
+ platform_settings[0],
+ lambda n: n.localName == "PreprovisionedVMType")
+ if (not preprovisionedVMTypeVal or len(preprovisionedVMTypeVal) == 0 or
+ preprovisionedVMTypeVal[0].firstChild is None):
+ LOG.debug("PreprovisionedVMType not found")
+ return preprovisionedVMType
+
+ preprovisionedVMType = preprovisionedVMTypeVal[0].firstChild.nodeValue
+
+ report_diagnostic_event(
+ "PreprovisionedVMType: %s" % preprovisionedVMType,
+ logger_func=LOG.info)
+
+ return preprovisionedVMType
def encrypt_pass(password, salt_id="$6$"):
@@ -1588,8 +2025,10 @@ def _generate_network_config_from_fallback_config() -> dict:
@azure_ds_telemetry_reporter
-def get_metadata_from_imds(fallback_nic, retries):
- """Query Azure's network metadata service, returning a dictionary.
+def get_metadata_from_imds(fallback_nic,
+ retries,
+ md_type=metadata_type.compute):
+ """Query Azure's instance metadata service, returning a dictionary.
If network is not up, setup ephemeral dhcp on fallback_nic to talk to the
IMDS. For more info on IMDS:
@@ -1604,7 +2043,7 @@ def get_metadata_from_imds(fallback_nic, retries):
"""
kwargs = {'logfunc': LOG.debug,
'msg': 'Crawl of Azure Instance Metadata Service (IMDS)',
- 'func': _get_metadata_from_imds, 'args': (retries,)}
+ 'func': _get_metadata_from_imds, 'args': (retries, md_type,)}
if net.is_up(fallback_nic):
return util.log_time(**kwargs)
else:
@@ -1620,9 +2059,9 @@ def get_metadata_from_imds(fallback_nic, retries):
@azure_ds_telemetry_reporter
-def _get_metadata_from_imds(retries):
+def _get_metadata_from_imds(retries, md_type=metadata_type.compute):
- url = IMDS_URL + "instance?api-version=2019-06-01"
+ url = md_type.value
headers = {"Metadata": "true"}
try:
response = readurl(
diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py
index c2ad587b..e13d6834 100644
--- a/cloudinit/sources/helpers/netlink.py
+++ b/cloudinit/sources/helpers/netlink.py
@@ -185,6 +185,54 @@ def read_rta_oper_state(data):
return InterfaceOperstate(ifname, operstate)
+def wait_for_nic_attach_event(netlink_socket, existing_nics):
+ '''Block until a single nic is attached.
+
+ :param: netlink_socket: netlink_socket to receive events
+ :param: existing_nics: List of existing nics so that we can skip them.
+ :raises: AssertionError if netlink_socket is none.
+ '''
+ LOG.debug("Preparing to wait for nic attach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ if iname in existing_nics:
+ return True
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ # We can return even if the operational state of the new nic is DOWN
+ # because we set it to UP before doing dhcp.
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_NEWLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
+def wait_for_nic_detach_event(netlink_socket):
+ '''Block until a single nic is detached and its operational state is down.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ '''
+ LOG.debug("Preparing to wait for nic detach.")
+ ifname = None
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ nonlocal ifname
+ ifname = iname
+ return False
+
+ read_netlink_messages(netlink_socket,
+ None,
+ [RTM_DELLINK],
+ [OPER_DOWN],
+ should_continue_cb)
+ return ifname
+
+
def wait_for_media_disconnect_connect(netlink_socket, ifname):
'''Block until media disconnect and connect has happened on an interface.
Listens on netlink socket to receive netlink events and when the carrier
@@ -198,10 +246,42 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
assert (netlink_socket is not None), ("netlink socket is none")
assert (ifname is not None), ("interface name is none")
assert (len(ifname) > 0), ("interface name cannot be empty")
+
+ def should_continue_cb(iname, carrier, prevCarrier):
+ # check for carrier down, up sequence
+ isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
+ if isVnetSwitch:
+ LOG.debug("Media switch happened on %s.", ifname)
+ return False
+ return True
+
+ LOG.debug("Wait for media disconnect and reconnect to happen")
+ read_netlink_messages(netlink_socket,
+ ifname,
+ [RTM_NEWLINK, RTM_DELLINK],
+ [OPER_UP, OPER_DOWN],
+ should_continue_cb)
+
+
+def read_netlink_messages(netlink_socket,
+ ifname_filter,
+ rtm_types,
+ operstates,
+ should_continue_callback):
+ ''' Reads from the netlink socket until the condition specified by
+ the continuation callback is met.
+
+ :param: netlink_socket: netlink_socket to receive events.
+ :param: ifname_filter: if not None, will only listen for this interface.
+ :param: rtm_types: Type of netlink events to listen for.
+ :param: operstates: Operational states to listen.
+ :param: should_continue_callback: Specifies when to stop listening.
+ '''
+ if netlink_socket is None:
+ raise RuntimeError("Netlink socket is none")
+ data = bytes()
carrier = OPER_UP
prevCarrier = OPER_UP
- data = bytes()
- LOG.debug("Wait for media disconnect and reconnect to happen")
while True:
recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT)
if recv_data is None:
@@ -223,26 +303,26 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname):
padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1)
offset = offset + padlen
LOG.debug('offset to next netlink message: %d', offset)
- # Ignore any messages not new link or del link
- if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]:
+ # Continue if we are not interested in this message.
+ if nlheader.type not in rtm_types:
continue
interface_state = read_rta_oper_state(nl_msg)
if interface_state is None:
LOG.debug('Failed to read rta attributes: %s', interface_state)
continue
- if interface_state.ifname != ifname:
+ if (ifname_filter is not None and
+ interface_state.ifname != ifname_filter):
LOG.debug(
"Ignored netlink event on interface %s. Waiting for %s.",
- interface_state.ifname, ifname)
+ interface_state.ifname, ifname_filter)
continue
- if interface_state.operstate not in [OPER_UP, OPER_DOWN]:
+ if interface_state.operstate not in operstates:
continue
prevCarrier = carrier
carrier = interface_state.operstate
- # check for carrier down, up sequence
- isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP)
- if isVnetSwitch:
- LOG.debug("Media switch happened on %s.", ifname)
+ if not should_continue_callback(interface_state.ifname,
+ carrier,
+ prevCarrier):
return
data = data[offset:]
diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py
index 10760bd6..cafe3961 100644
--- a/cloudinit/sources/helpers/tests/test_netlink.py
+++ b/cloudinit/sources/helpers/tests/test_netlink.py
@@ -9,9 +9,10 @@ import codecs
from cloudinit.sources.helpers.netlink import (
NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket,
read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect,
+ wait_for_nic_attach_event, wait_for_nic_detach_event,
OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT,
- OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK,
- RTM_GETLINK, MAX_SIZE)
+ OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_DELLINK,
+ RTM_SETLINK, RTM_GETLINK, MAX_SIZE)
def int_to_bytes(i):
@@ -135,6 +136,75 @@ class TestParseNetlinkMessage(CiTestCase):
@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
+class TestNicAttachDetach(CiTestCase):
+ with_logs = True
+
+ def _media_switch_data(self, ifname, msg_type, operstate):
+ '''construct netlink data with specified fields'''
+ if ifname and operstate is not None:
+ data = bytearray(48)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3,
+ bytes, 5, 16, int_to_bytes(operstate))
+ elif ifname:
+ data = bytearray(40)
+ bytes = ifname.encode("utf-8")
+ struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes)
+ elif operstate:
+ data = bytearray(40)
+ struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16,
+ int_to_bytes(operstate))
+ struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0)
+ return data
+
+ def test_nic_attached_oper_down(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attached_oper_up(self, m_read_netlink_socket, m_socket):
+ '''Test for a new nic attached'''
+ ifname = "eth0"
+ data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP)
+ m_read_netlink_socket.side_effect = [data_op_up]
+ ifread = wait_for_nic_attach_event(m_socket, [])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+ def test_nic_attach_ignore_existing(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth0"])
+ self.assertEqual(m_read_netlink_socket.call_count, 2)
+ self.assertEqual("eth1", ifread)
+
+ def test_nic_attach_read_first(self, m_read_netlink_socket, m_socket):
+ '''Test that we read only the interfaces we are interested in.'''
+ data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN)
+ data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_eth0, data_eth1]
+ ifread = wait_for_nic_attach_event(m_socket, ["eth1"])
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual("eth0", ifread)
+
+ def test_nic_detached(self, m_read_netlink_socket, m_socket):
+ '''Test for an existing nic detached'''
+ ifname = "eth0"
+ data_op_down = self._media_switch_data(ifname, RTM_DELLINK, OPER_DOWN)
+ m_read_netlink_socket.side_effect = [data_op_down]
+ ifread = wait_for_nic_detach_event(m_socket)
+ self.assertEqual(m_read_netlink_socket.call_count, 1)
+ self.assertEqual(ifname, ifread)
+
+
+@mock.patch('cloudinit.sources.helpers.netlink.socket.socket')
+@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket')
class TestWaitForMediaDisconnectConnect(CiTestCase):
with_logs = True
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index 534314aa..e363c1f9 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -11,6 +11,7 @@ from cloudinit.version import version_string as vs
from cloudinit.tests.helpers import (
HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
ExitStack, resourceLocation)
+from cloudinit.sources.helpers import netlink
import copy
import crypt
@@ -78,6 +79,8 @@ def construct_valid_ovf_env(data=None, pubkeys=None,
if platform_settings:
for k, v in platform_settings.items():
content += "<%s>%s</%s>\n" % (k, v, k)
+ if "PreprovisionedVMType" not in platform_settings:
+ content += """<PreprovisionedVMType i:nil="true" />"""
content += """</PlatformSettings></wa:PlatformSettingsSection>
</Environment>"""
@@ -156,6 +159,31 @@ SECONDARY_INTERFACE = {
}
}
+IMDS_NETWORK_METADATA = {
+ "interface": [
+ {
+ "macAddress": "000D3A047598",
+ "ipv6": {
+ "ipAddress": []
+ },
+ "ipv4": {
+ "subnet": [
+ {
+ "prefix": "24",
+ "address": "10.0.0.0"
+ }
+ ],
+ "ipAddress": [
+ {
+ "privateIpAddress": "10.0.0.4",
+ "publicIpAddress": "104.46.124.81"
+ }
+ ]
+ }
+ }
+ ]
+}
+
MOCKPATH = 'cloudinit.sources.DataSourceAzure.'
@@ -366,8 +394,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2019-06-01"
@mock.patch(MOCKPATH + 'readurl')
- @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4', autospec=True)
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_does_not_dhcp_if_network_is_up(
self, m_net_is_up, m_dhcp, m_readurl):
"""Do not perform DHCP setup when nic is already up."""
@@ -384,9 +412,66 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
"Crawl of Azure Instance Metadata Service (IMDS) took", # log_time
self.logs.getvalue())
- @mock.patch(MOCKPATH + 'readurl')
- @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_compute_metadata_uses_compute_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3, md_type=dsaz.metadata_type.compute)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_network_metadata_uses_network_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3, md_type=dsaz.metadata_type.network)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance/network?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
@mock.patch(MOCKPATH + 'net.is_up')
+ def test_get_default_metadata_uses_compute_url(
+ self, m_net_is_up, m_dhcp, m_readurl):
+ """Make sure readurl is called with the correct url when accessing
+ network metadata"""
+ m_net_is_up.return_value = True
+ m_readurl.return_value = url_helper.StringResponse(
+ json.dumps(IMDS_NETWORK_METADATA).encode('utf-8'))
+
+ dsaz.get_metadata_from_imds(
+ 'eth0', retries=3)
+ m_readurl.assert_called_with(
+ "http://169.254.169.254/metadata/instance?api-version="
+ "2019-06-01", exception_cb=mock.ANY,
+ headers=mock.ANY, retries=mock.ANY,
+ timeout=mock.ANY)
+
+ @mock.patch(MOCKPATH + 'readurl', autospec=True)
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting', autospec=True)
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_performs_dhcp_when_network_is_down(
self, m_net_is_up, m_dhcp, m_readurl):
"""Perform DHCP setup when nic is not up."""
@@ -410,7 +495,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS)
@mock.patch('cloudinit.url_helper.time.sleep')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_from_imds_empty_when_no_imds_present(
self, m_net_is_up, m_sleep):
"""Return empty dict when IMDS network metadata is absent."""
@@ -431,7 +516,7 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
@mock.patch('requests.Session.request')
@mock.patch('cloudinit.url_helper.time.sleep')
- @mock.patch(MOCKPATH + 'net.is_up')
+ @mock.patch(MOCKPATH + 'net.is_up', autospec=True)
def test_get_metadata_from_imds_retries_on_timeout(
self, m_net_is_up, m_sleep, m_request):
"""Retry IMDS network metadata on timeout errors."""
@@ -801,6 +886,7 @@ scbus-1 on xpt0 bus 0
'sys_cfg': {}}
dsrc = self._get_ds(data)
expected_cfg = {
+ 'PreprovisionedVMType': None,
'PreprovisionedVm': False,
'datasource': {'Azure': {'agent_command': 'my_command'}},
'system_info': {'default_user': {'name': u'myuser'}}}
@@ -864,6 +950,66 @@ scbus-1 on xpt0 bus 0
dsrc.crawl_metadata()
self.assertEqual(1, m_report_ready.call_count)
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting')
+ @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready')
+ @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure.'
+ '_wait_for_all_nics_ready')
+ def test_crawl_metadata_waits_for_nic_on_savable_vms(
+ self, detect_nics, poll_imds_func, report_ready_func, m_write, m_dhcp
+ ):
+ """If reprovisioning, report ready at the end"""
+ ovfenv = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Savable",
+ "PreprovisionedVm": "True"}
+ )
+
+ data = {
+ 'ovfcontent': ovfenv,
+ 'sys_cfg': {}
+ }
+ dsrc = self._get_ds(data)
+ poll_imds_func.return_value = ovfenv
+ dsrc.crawl_metadata()
+ self.assertEqual(1, report_ready_func.call_count)
+ self.assertEqual(1, detect_nics.call_count)
+
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.EphemeralDHCPv4WithReporting')
+ @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready')
+ @mock.patch('cloudinit.sources.DataSourceAzure.DataSourceAzure._poll_imds')
+ @mock.patch(
+ 'cloudinit.sources.DataSourceAzure.DataSourceAzure.'
+ '_wait_for_all_nics_ready')
+ @mock.patch('os.path.isfile')
+ def test_detect_nics_when_marker_present(
+ self, is_file, detect_nics, poll_imds_func, report_ready_func, m_write,
+ m_dhcp):
+ """If reprovisioning, wait for nic attach if marker present"""
+
+ def is_file_ret(key):
+ return key == dsaz.REPROVISION_NIC_ATTACH_MARKER_FILE
+
+ is_file.side_effect = is_file_ret
+ ovfenv = construct_valid_ovf_env()
+
+ data = {
+ 'ovfcontent': ovfenv,
+ 'sys_cfg': {}
+ }
+
+ dsrc = self._get_ds(data)
+ poll_imds_func.return_value = ovfenv
+ dsrc.crawl_metadata()
+ self.assertEqual(1, report_ready_func.call_count)
+ self.assertEqual(1, detect_nics.call_count)
+
@mock.patch('cloudinit.sources.DataSourceAzure.util.write_file')
@mock.patch('cloudinit.sources.helpers.netlink.'
'wait_for_media_disconnect_connect')
@@ -1526,7 +1672,7 @@ scbus-1 on xpt0 bus 0
@mock.patch('cloudinit.net.get_interface_mac')
@mock.patch('cloudinit.net.get_devicelist')
@mock.patch('cloudinit.net.device_driver')
- @mock.patch('cloudinit.net.generate_fallback_config')
+ @mock.patch('cloudinit.net.generate_fallback_config', autospec=True)
def test_fallback_network_config(self, mock_fallback, mock_dd,
mock_devlist, mock_get_mac):
"""On absent IMDS network data, generate network fallback config."""
@@ -1561,7 +1707,7 @@ scbus-1 on xpt0 bus 0
blacklist_drivers=['mlx4_core', 'mlx5_core'],
config_driver=True)
- @mock.patch(MOCKPATH + 'net.get_interfaces')
+ @mock.patch(MOCKPATH + 'net.get_interfaces', autospec=True)
@mock.patch(MOCKPATH + 'util.is_FreeBSD')
def test_blacklist_through_distro(
self, m_is_freebsd, m_net_get_interfaces):
@@ -1583,17 +1729,17 @@ scbus-1 on xpt0 bus 0
m_net_get_interfaces.assert_called_with(
blacklist_drivers=dsaz.BLACKLIST_DRIVERS)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_no_args(self, m_subp):
dsaz.get_hostname()
m_subp.assert_called_once_with(("hostname",), capture=True)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_string_arg(self, m_subp):
dsaz.get_hostname(hostname_command="hostname")
m_subp.assert_called_once_with(("hostname",), capture=True)
- @mock.patch(MOCKPATH + 'subp.subp')
+ @mock.patch(MOCKPATH + 'subp.subp', autospec=True)
def test_get_hostname_with_iterable_arg(self, m_subp):
dsaz.get_hostname(hostname_command=("hostname",))
m_subp.assert_called_once_with(("hostname",), capture=True)
@@ -2224,6 +2370,29 @@ class TestPreprovisioningReadAzureOvfFlag(CiTestCase):
ret = dsaz.read_azure_ovf(content)
cfg = ret[2]
self.assertFalse(cfg['PreprovisionedVm'])
+ self.assertEqual(None, cfg["PreprovisionedVMType"])
+
+ def test_read_azure_ovf_with_running_type(self):
+ """The read_azure_ovf method should set PreprovisionedVMType
+ cfg flag to Running."""
+ content = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Running",
+ "PreprovisionedVm": "True"})
+ ret = dsaz.read_azure_ovf(content)
+ cfg = ret[2]
+ self.assertTrue(cfg['PreprovisionedVm'])
+ self.assertEqual("Running", cfg['PreprovisionedVMType'])
+
+ def test_read_azure_ovf_with_savable_type(self):
+ """The read_azure_ovf method should set PreprovisionedVMType
+ cfg flag to Savable."""
+ content = construct_valid_ovf_env(
+ platform_settings={"PreprovisionedVMType": "Savable",
+ "PreprovisionedVm": "True"})
+ ret = dsaz.read_azure_ovf(content)
+ cfg = ret[2]
+ self.assertTrue(cfg['PreprovisionedVm'])
+ self.assertEqual("Savable", cfg['PreprovisionedVMType'])
@mock.patch('os.path.isfile')
@@ -2273,6 +2442,227 @@ class TestPreprovisioningShouldReprovision(CiTestCase):
_poll_imds.assert_called_with()
+class TestPreprovisioningHotAttachNics(CiTestCase):
+
+ def setUp(self):
+ super(TestPreprovisioningHotAttachNics, self).setUp()
+ self.tmp = self.tmp_dir()
+ self.waagent_d = self.tmp_path('/var/lib/waagent', self.tmp)
+ self.paths = helpers.Paths({'cloud_dir': self.tmp})
+ dsaz.BUILTIN_DS_CONFIG['data_dir'] = self.waagent_d
+ self.paths = helpers.Paths({'cloud_dir': self.tmp})
+
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_detach_event',
+ autospec=True)
+ @mock.patch(MOCKPATH + 'util.write_file', autospec=True)
+ def test_nic_detach_writes_marker(self, m_writefile, m_detach):
+ """When we detect that a nic gets detached, we write a marker for it"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ nl_sock = mock.MagicMock()
+ dsa._wait_for_nic_detach(nl_sock)
+ m_detach.assert_called_with(nl_sock)
+ self.assertEqual(1, m_detach.call_count)
+ m_writefile.assert_called_with(
+ dsaz.REPROVISION_NIC_DETACHED_MARKER_FILE, mock.ANY)
+
+ @mock.patch(MOCKPATH + 'util.write_file', autospec=True)
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_reports_ready_and_waits_for_detach(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if,
+ m_writefile):
+ """Report ready first and then wait for nic detach"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(1, m_report_ready.call_count)
+ self.assertEqual(1, m_detach.call_count)
+ self.assertEqual(1, m_writefile.call_count)
+ self.assertEqual(1, m_dhcp.call_count)
+ m_writefile.assert_called_with(dsaz.REPORTED_READY_MARKER_FILE,
+ mock.ANY)
+
+ @mock.patch('os.path.isfile')
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_skips_report_ready_when_marker_present(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile):
+ """Skip reporting ready if we already have a marker file."""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+
+ def isfile(key):
+ return key == dsaz.REPORTED_READY_MARKER_FILE
+
+ m_isfile.side_effect = isfile
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(0, m_report_ready.call_count)
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(1, m_detach.call_count)
+
+ @mock.patch('os.path.isfile')
+ @mock.patch(MOCKPATH + 'DataSourceAzure.fallback_interface')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4WithReporting')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._report_ready')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ def test_detect_nic_attach_skips_nic_detach_when_marker_present(
+ self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_isfile):
+ """Skip wait for nic detach if it already happened."""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+
+ m_isfile.return_value = True
+ dsa._wait_for_all_nics_ready()
+ m_fallback_if.return_value = "Dummy interface"
+ self.assertEqual(0, m_report_ready.call_count)
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(0, m_detach.call_count)
+
+ @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up', autospec=True)
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event')
+ @mock.patch('cloudinit.sources.net.find_fallback_nic')
+ @mock.patch(MOCKPATH + 'get_metadata_from_imds')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ @mock.patch('os.path.isfile')
+ def test_wait_for_nic_attach_if_no_fallback_interface(
+ self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if,
+ m_attach, m_link_up):
+ """Wait for nic attach if we do not have a fallback interface"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ lease = {
+ 'interface': 'eth9', 'fixed-address': '192.168.2.9',
+ 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
+ 'unknown-245': '624c3620'}
+
+ m_isfile.return_value = True
+ m_attach.return_value = "eth0"
+ dhcp_ctx = mock.MagicMock(lease=lease)
+ dhcp_ctx.obtain_lease.return_value = lease
+ m_dhcpv4.return_value = dhcp_ctx
+ m_imds.return_value = IMDS_NETWORK_METADATA
+ m_fallback_if.return_value = None
+
+ dsa._wait_for_all_nics_ready()
+
+ self.assertEqual(0, m_detach.call_count)
+ self.assertEqual(1, m_attach.call_count)
+ self.assertEqual(1, m_dhcpv4.call_count)
+ self.assertEqual(1, m_imds.call_count)
+ self.assertEqual(1, m_link_up.call_count)
+ m_link_up.assert_called_with(mock.ANY, "eth0")
+
+ @mock.patch(MOCKPATH + 'DataSourceAzure.wait_for_link_up')
+ @mock.patch('cloudinit.sources.helpers.netlink.wait_for_nic_attach_event')
+ @mock.patch('cloudinit.sources.net.find_fallback_nic')
+ @mock.patch(MOCKPATH + 'get_metadata_from_imds')
+ @mock.patch(MOCKPATH + 'EphemeralDHCPv4')
+ @mock.patch(MOCKPATH + 'DataSourceAzure._wait_for_nic_detach')
+ @mock.patch('os.path.isfile')
+ def test_wait_for_nic_attach_multinic_attach(
+ self, m_isfile, m_detach, m_dhcpv4, m_imds, m_fallback_if,
+ m_attach, m_link_up):
+ """Wait for nic attach if we do not have a fallback interface"""
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ lease = {
+ 'interface': 'eth9', 'fixed-address': '192.168.2.9',
+ 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
+ 'unknown-245': '624c3620'}
+ m_attach_call_count = 0
+
+ def nic_attach_ret(nl_sock, nics_found):
+ nonlocal m_attach_call_count
+ if m_attach_call_count == 0:
+ m_attach_call_count = m_attach_call_count + 1
+ return "eth0"
+ return "eth1"
+
+ def network_metadata_ret(ifname, retries, type):
+ # Simulate two NICs by adding the same one twice.
+ md = IMDS_NETWORK_METADATA
+ md['interface'].append(md['interface'][0])
+ if ifname == "eth0":
+ return md
+ raise requests.Timeout('Fake connection timeout')
+
+ m_isfile.return_value = True
+ m_attach.side_effect = nic_attach_ret
+ dhcp_ctx = mock.MagicMock(lease=lease)
+ dhcp_ctx.obtain_lease.return_value = lease
+ m_dhcpv4.return_value = dhcp_ctx
+ m_imds.side_effect = network_metadata_ret
+ m_fallback_if.return_value = None
+
+ dsa._wait_for_all_nics_ready()
+
+ self.assertEqual(0, m_detach.call_count)
+ self.assertEqual(2, m_attach.call_count)
+ # DHCP and network metadata calls will only happen on the primary NIC.
+ self.assertEqual(1, m_dhcpv4.call_count)
+ self.assertEqual(1, m_imds.call_count)
+ self.assertEqual(2, m_link_up.call_count)
+
+ @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up')
+ def test_wait_for_link_up_returns_if_already_up(
+ self, m_is_link_up):
+ """Waiting for link to be up should return immediately if the link is
+ already up."""
+
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+ m_is_link_up.return_value = True
+
+ dsa.wait_for_link_up("eth0")
+ self.assertEqual(1, m_is_link_up.call_count)
+
+ @mock.patch(MOCKPATH + 'util.write_file')
+ @mock.patch('cloudinit.net.read_sys_net')
+ @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up')
+ def test_wait_for_link_up_writes_to_device_file(
+ self, m_is_link_up, m_read_sys_net, m_writefile):
+ """Waiting for link to be up should return immediately if the link is
+ already up."""
+
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+
+ callcount = 0
+
+ def linkup(key):
+ nonlocal callcount
+ if callcount == 0:
+ callcount += 1
+ return False
+ return True
+
+ m_is_link_up.side_effect = linkup
+
+ dsa.wait_for_link_up("eth0")
+ self.assertEqual(2, m_is_link_up.call_count)
+ self.assertEqual(1, m_read_sys_net.call_count)
+ self.assertEqual(2, m_writefile.call_count)
+
+ @mock.patch('cloudinit.sources.helpers.netlink.'
+ 'create_bound_netlink_socket')
+ def test_wait_for_all_nics_ready_raises_if_socket_fails(self, m_socket):
+ """Waiting for all nics should raise exception if netlink socket
+ creation fails."""
+
+ m_socket.side_effect = netlink.NetlinkCreateSocketError
+ distro_cls = distros.fetch('ubuntu')
+ distro = distro_cls('ubuntu', {}, self.paths)
+ dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+
+ self.assertRaises(netlink.NetlinkCreateSocketError,
+ dsa._wait_for_all_nics_ready)
+ # dsa._wait_for_all_nics_ready()
+
+
@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network')
@mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
@mock.patch('cloudinit.sources.helpers.netlink.'
@@ -2330,6 +2720,24 @@ class TestPreprovisioningPollIMDS(CiTestCase):
self.assertEqual(3, m_dhcpv4.call_count, 'Expected 3 DHCP calls')
self.assertEqual(4, self.tries, 'Expected 4 total reads from IMDS')
+ @mock.patch('os.path.isfile')
+ def test_poll_imds_skips_dhcp_if_ctx_present(
+ self, m_isfile, report_ready_func, fake_resp, m_media_switch,
+ m_dhcp, m_net):
+ """The poll_imds function should reuse the dhcp ctx if it is already
+ present. This happens when we wait for nic to be hot-attached before
+ polling for reprovisiondata. Note that if this ctx is set when
+ _poll_imds is called, then it is not expected to be waiting for
+ media_disconnect_connect either."""
+ report_file = self.tmp_path('report_marker', self.tmp)
+ m_isfile.return_value = True
+ dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths)
+ dsa._ephemeral_dhcp_ctx = "Dummy dhcp ctx"
+ with mock.patch(MOCKPATH + 'REPORTED_READY_MARKER_FILE', report_file):
+ dsa._poll_imds()
+ self.assertEqual(0, m_dhcp.call_count)
+ self.assertEqual(0, m_media_switch.call_count)
+
def test_does_not_poll_imds_report_ready_when_marker_file_exists(
self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net):
"""poll_imds should not call report ready when the reported ready
@@ -2390,7 +2798,7 @@ class TestPreprovisioningPollIMDS(CiTestCase):
@mock.patch(MOCKPATH + 'util.is_FreeBSD')
@mock.patch('cloudinit.sources.helpers.netlink.'
'wait_for_media_disconnect_connect')
-@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network')
+@mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network', autospec=True)
@mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
@mock.patch('requests.Session.request')
class TestAzureDataSourcePreprovisioning(CiTestCase):
@@ -2412,7 +2820,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
m_dhcp.return_value = [{
'interface': 'eth9', 'fixed-address': '192.168.2.9',
'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0'}]
- url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02'
+ url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01'
host = "169.254.169.254"
full_url = url.format(host)
m_request.return_value = mock.MagicMock(status_code=200, text="ovf",
@@ -2445,7 +2853,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
'interface': 'eth9', 'fixed-address': '192.168.2.9',
'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
'unknown-245': '624c3620'}]
- url = 'http://{0}/metadata/reprovisiondata?api-version=2017-04-02'
+ url = 'http://{0}/metadata/reprovisiondata?api-version=2019-06-01'
host = "169.254.169.254"
full_url = url.format(host)
hostname = "myhost"
diff --git a/tools/.github-cla-signers b/tools/.github-cla-signers
index 9b594a44..1e0c3ea4 100644
--- a/tools/.github-cla-signers
+++ b/tools/.github-cla-signers
@@ -1,6 +1,7 @@
ader1990
AlexBaranowski
Aman306
+aswinrajamannar
beezly
bipinbachhao
BirknerAlex