diff options
author | Douglas Jordan <dojordan@microsoft.com> | 2018-01-24 16:10:08 -0700 |
---|---|---|
committer | Chad Smith <chad.smith@canonical.com> | 2018-01-24 16:10:08 -0700 |
commit | c03bdd3d8ed762cada813c5e95a40b14d2047b57 (patch) | |
tree | 708422bb64a8804f649ad7558d5088f9e11011a4 /cloudinit/sources | |
parent | 30597f28512fafbe25486df5865b628d859486c6 (diff) | |
download | vyos-cloud-init-c03bdd3d8ed762cada813c5e95a40b14d2047b57.tar.gz vyos-cloud-init-c03bdd3d8ed762cada813c5e95a40b14d2047b57.zip |
Azure VM Preprovisioning support.
This change will enable azure vms to report provisioning has completed
twice, first to tell the fabric it has completed then a second time to
enable customer settings. The datasource for the second provisioning is
the Instance Metadata Service (IMDS),and the VM will poll indefinitely for
the new ovf-env.xml from IMDS.
This branch introduces EphemeralDHCPv4 which encapsulates common logic
used by both DataSourceEc2 an DataSourceAzure for temporary DHCP
interactions without side-effects.
LP: #1734991
Diffstat (limited to 'cloudinit/sources')
-rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 138 | ||||
-rw-r--r-- | cloudinit/sources/DataSourceEc2.py | 23 | ||||
-rw-r--r-- | cloudinit/sources/helpers/azure.py | 22 |
3 files changed, 150 insertions, 33 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index d1d09757..4bcbf3a4 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -11,13 +11,16 @@ from functools import partial import os import os.path import re +from time import time from xml.dom import minidom import xml.etree.ElementTree as ET from cloudinit import log as logging from cloudinit import net +from cloudinit.net.dhcp import EphemeralDHCPv4 from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric +from cloudinit.url_helper import readurl, wait_for_url, UrlError from cloudinit import util LOG = logging.getLogger(__name__) @@ -44,6 +47,9 @@ LEASE_FILE = '/var/lib/dhcp/dhclient.eth0.leases' DEFAULT_FS = 'ext4' # DMI chassis-asset-tag is set static for all azure instances AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77' +REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds" +IMDS_URL = "http://169.254.169.254/metadata/reprovisiondata" +IMDS_RETRIES = 5 def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid): @@ -276,19 +282,20 @@ class DataSourceAzure(sources.DataSource): with temporary_hostname(azure_hostname, self.ds_cfg, hostname_command=hostname_command) \ - as previous_hostname: - if (previous_hostname is not None and + as previous_hn: + if (previous_hn is not None and util.is_true(self.ds_cfg.get('set_hostname'))): cfg = self.ds_cfg['hostname_bounce'] # "Bouncing" the network try: - perform_hostname_bounce(hostname=azure_hostname, - cfg=cfg, - prev_hostname=previous_hostname) + return perform_hostname_bounce(hostname=azure_hostname, + cfg=cfg, + prev_hostname=previous_hn) except Exception as e: LOG.warning("Failed publishing hostname: %s", e) util.logexc(LOG, "handling set_hostname failed") + return False def get_metadata_from_agent(self): temp_hostname = self.metadata.get('local-hostname') @@ -345,15 +352,20 @@ class DataSourceAzure(sources.DataSource): ddir = self.ds_cfg['data_dir'] candidates = [self.seed_dir] + if os.path.isfile(REPROVISION_MARKER_FILE): + candidates.insert(0, "IMDS") candidates.extend(list_possible_azure_ds_devs()) if ddir: candidates.append(ddir) found = None - + reprovision = False for cdev in candidates: try: - if cdev.startswith("/dev/"): + if cdev == "IMDS": + ret = None + reprovision = True + elif cdev.startswith("/dev/"): if util.is_FreeBSD(): ret = util.mount_cb(cdev, load_azure_ds_dir, mtype="udf", sync=False) @@ -370,6 +382,8 @@ class DataSourceAzure(sources.DataSource): LOG.warning("%s was not mountable", cdev) continue + if reprovision or self._should_reprovision(ret): + ret = self._reprovision() (md, self.userdata_raw, cfg, files) = ret self.seed = cdev self.metadata = util.mergemanydict([md, DEFAULT_METADATA]) @@ -428,6 +442,83 @@ class DataSourceAzure(sources.DataSource): LOG.debug("negotiating already done for %s", self.get_instance_id()) + def _poll_imds(self, report_ready=True): + """Poll IMDS for the new provisioning data until we get a valid + response. Then return the returned JSON object.""" + url = IMDS_URL + "?api-version=2017-04-02" + headers = {"Metadata": "true"} + LOG.debug("Start polling IMDS") + + def sleep_cb(response, loop_n): + return 1 + + def exception_cb(msg, exception): + if isinstance(exception, UrlError) and exception.code == 404: + return + LOG.warning("Exception during polling. Will try DHCP.", + exc_info=True) + + # If we get an exception while trying to call IMDS, we + # call DHCP and setup the ephemeral network to acquire the new IP. + raise exception + + need_report = report_ready + for i in range(IMDS_RETRIES): + try: + with EphemeralDHCPv4() as lease: + if need_report: + self._report_ready(lease=lease) + need_report = False + wait_for_url([url], max_wait=None, timeout=60, + status_cb=LOG.info, + headers_cb=lambda url: headers, sleep_time=1, + exception_cb=exception_cb, + sleep_time_cb=sleep_cb) + return str(readurl(url, headers=headers)) + except Exception: + LOG.debug("Exception during polling-retrying dhcp" + + " %d more time(s).", (IMDS_RETRIES - i), + exc_info=True) + + def _report_ready(self, lease): + """Tells the fabric provisioning has completed + before we go into our polling loop.""" + try: + get_metadata_from_fabric(None, lease['unknown-245']) + except Exception as exc: + LOG.warning( + "Error communicating with Azure fabric; You may experience." + "connectivity issues.", exc_info=True) + + def _should_reprovision(self, ret): + """Whether or not we should poll IMDS for reprovisioning data. + Also sets a marker file to poll IMDS. + + The marker file is used for the following scenario: the VM boots into + this polling loop, which we expect to be proceeding infinitely until + the VM is picked. If for whatever reason the platform moves us to a + new host (for instance a hardware issue), we need to keep polling. + However, since the VM reports ready to the Fabric, we will not attach + the ISO, thus cloud-init needs to have a way of knowing that it should + jump back into the polling loop in order to retrieve the ovf_env.""" + if not ret: + return False + (md, self.userdata_raw, cfg, files) = ret + path = REPROVISION_MARKER_FILE + if (cfg.get('PreprovisionedVm') is True or + os.path.isfile(path)): + if not os.path.isfile(path): + LOG.info("Creating a marker file to poll imds") + util.write_file(path, "%s: %s\n" % (os.getpid(), time())) + return True + return False + + def _reprovision(self): + """Initiate the reprovisioning workflow.""" + contents = self._poll_imds() + md, ud, cfg = read_azure_ovf(contents) + return (md, ud, cfg, {'ovf-env.xml': contents}) + def _negotiate(self): """Negotiate with fabric and return data from it. @@ -453,7 +544,7 @@ class DataSourceAzure(sources.DataSource): "Error communicating with Azure fabric; You may experience." "connectivity issues.", exc_info=True) return False - + util.del_file(REPROVISION_MARKER_FILE) return fabric_data def activate(self, cfg, is_new_instance): @@ -595,6 +686,7 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, def perform_hostname_bounce(hostname, cfg, prev_hostname): # set the hostname to 'hostname' if it is not already set to that. # then, if policy is not off, bounce the interface using command + # Returns True if the network was bounced, False otherwise. command = cfg['command'] interface = cfg['interface'] policy = cfg['policy'] @@ -614,7 +706,8 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname): else: LOG.debug( "Skipping network bounce: ifupdown utils aren't present.") - return # Don't bounce as networkd handles hostname DDNS updates + # Don't bounce as networkd handles hostname DDNS updates + return False LOG.debug("pubhname: publishing hostname [%s]", msg) shell = not isinstance(command, (list, tuple)) # capture=False, see comments in bug 1202758 and bug 1206164. @@ -622,6 +715,7 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname): get_uptime=True, func=util.subp, kwargs={'args': command, 'shell': shell, 'capture': False, 'env': env}) + return True def crtfile_to_pubkey(fname, data=None): @@ -838,9 +932,35 @@ def read_azure_ovf(contents): if 'ssh_pwauth' not in cfg and password: cfg['ssh_pwauth'] = True + cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom) + return (md, ud, cfg) +def _extract_preprovisioned_vm_setting(dom): + """Read the preprovision flag from the ovf. It should not + exist unless true.""" + platform_settings_section = find_child( + dom.documentElement, + lambda n: n.localName == "PlatformSettingsSection") + if not platform_settings_section or len(platform_settings_section) == 0: + LOG.debug("PlatformSettingsSection not found") + return False + platform_settings = find_child( + platform_settings_section[0], + lambda n: n.localName == "PlatformSettings") + if not platform_settings or len(platform_settings) == 0: + LOG.debug("PlatformSettings not found") + return False + preprovisionedVm = find_child( + platform_settings[0], + lambda n: n.localName == "PreprovisionedVm") + if not preprovisionedVm or len(preprovisionedVm) == 0: + LOG.debug("PreprovisionedVm not found") + return False + return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue) + + def encrypt_pass(password, salt_id="$6$"): return crypt.crypt(password, salt_id + util.rand_str(strlen=16)) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 0f89f34d..e14553b3 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -14,7 +14,7 @@ import time from cloudinit import ec2_utils as ec2 from cloudinit import log as logging from cloudinit import net -from cloudinit.net import dhcp +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit import sources from cloudinit import url_helper as uhelp from cloudinit import util @@ -102,22 +102,13 @@ class DataSourceEc2(sources.DataSource): if util.is_FreeBSD(): LOG.debug("FreeBSD doesn't support running dhclient with -sf") return False - dhcp_leases = dhcp.maybe_perform_dhcp_discovery( - self.fallback_interface) - if not dhcp_leases: - # DataSourceEc2Local failed in init-local stage. DataSourceEc2 - # will still run in init-network stage. + try: + with EphemeralDHCPv4(self.fallback_interface): + return util.log_time( + logfunc=LOG.debug, msg='Crawl of metadata service', + func=self._crawl_metadata) + except NoDHCPLeaseError: return False - dhcp_opts = dhcp_leases[-1] - net_params = {'interface': dhcp_opts.get('interface'), - 'ip': dhcp_opts.get('fixed-address'), - 'prefix_or_mask': dhcp_opts.get('subnet-mask'), - 'broadcast': dhcp_opts.get('broadcast-address'), - 'router': dhcp_opts.get('routers')} - with net.EphemeralIPv4Network(**net_params): - return util.log_time( - logfunc=LOG.debug, msg='Crawl of metadata service', - func=self._crawl_metadata) else: return self._crawl_metadata() diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 6cda5721..90c12df1 100644 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -199,10 +199,10 @@ class WALinuxAgentShim(object): ' </Container>', '</Health>']) - def __init__(self, fallback_lease_file=None): + def __init__(self, fallback_lease_file=None, dhcp_options=None): LOG.debug('WALinuxAgentShim instantiated, fallback_lease_file=%s', fallback_lease_file) - self.dhcpoptions = None + self.dhcpoptions = dhcp_options self._endpoint = None self.openssl_manager = None self.values = {} @@ -220,7 +220,8 @@ class WALinuxAgentShim(object): @property def endpoint(self): if self._endpoint is None: - self._endpoint = self.find_endpoint(self.lease_file) + self._endpoint = self.find_endpoint(self.lease_file, + self.dhcpoptions) return self._endpoint @staticmethod @@ -292,10 +293,14 @@ class WALinuxAgentShim(object): return _value @staticmethod - def find_endpoint(fallback_lease_file=None): + def find_endpoint(fallback_lease_file=None, dhcp245=None): value = None - LOG.debug('Finding Azure endpoint from networkd...') - value = WALinuxAgentShim._networkd_get_value_from_leases() + if dhcp245 is not None: + value = dhcp245 + LOG.debug("Using Azure Endpoint from dhcp options") + if value is None: + LOG.debug('Finding Azure endpoint from networkd...') + value = WALinuxAgentShim._networkd_get_value_from_leases() if value is None: # Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json # a dhclient exit hook that calls cloud-init-dhclient-hook @@ -367,8 +372,9 @@ class WALinuxAgentShim(object): LOG.info('Reported ready to Azure fabric.') -def get_metadata_from_fabric(fallback_lease_file=None): - shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file) +def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None): + shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file, + dhcp_options=dhcp_opts) try: return shim.register_with_azure_and_fetch_data() finally: |