From 9e904bbc3336b96475bfd00fb3bf1262ae4de49f Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 17 Nov 2016 14:46:23 -0500 Subject: Add activate_datasource, for datasource specific code paths. This adds a call to 'activate_datasource'. That will be called during init stage (or init-local in the event of a 'local' dsmode). It is present so that the datasource can do platform specific operations that may be necessary. It is passed the fully rendered cloud-config and whether or not the instance is a new instance. The Azure datasource uses this to address formatting of the ephemeral devices. It does so by a.) waiting for the device to come online b.) removing the marker files for the disk_setup and mounts modules if it finds that the ephemeral device has been reset. LP: #1611074 --- cloudinit/sources/DataSourceAzure.py | 199 ++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 95 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index b802b03e..22f9004a 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -19,7 +19,6 @@ import base64 import contextlib import crypt -import fnmatch from functools import partial import os import os.path @@ -28,7 +27,6 @@ from xml.dom import minidom import xml.etree.ElementTree as ET from cloudinit import log as logging -from cloudinit.settings import PER_ALWAYS from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric from cloudinit import util @@ -42,6 +40,9 @@ BOUNCE_COMMAND = [ 'sh', '-xc', "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x" ] +# azure systems will always have a resource disk, and 66-azure-ephemeral.rules +# ensures that it gets linked to this path. +RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource' BUILTIN_DS_CONFIG = { 'agent_command': AGENT_START, @@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = { 'command': BOUNCE_COMMAND, 'hostname_command': 'hostname', }, - 'disk_aliases': {'ephemeral0': '/dev/sdb'}, + 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH}, 'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases', } @@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource): self.metadata['instance-id'] = util.read_dmi_data('system-uuid') self.metadata.update(fabric_data) - found_ephemeral = find_fabric_formatted_ephemeral_disk() - if found_ephemeral: - self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral - LOG.debug("using detected ephemeral0 of %s", found_ephemeral) - - cc_modules_override = support_new_ephemeral(self.sys_cfg) - if cc_modules_override: - self.cfg['cloud_init_modules'] = cc_modules_override - return True def device_name_to_device(self, name): @@ -266,97 +258,104 @@ class DataSourceAzureNet(sources.DataSource): # quickly (local check only) if self.instance_id is still valid return sources.instance_id_matches_system_uuid(self.get_instance_id()) - -def count_files(mp): - return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) + def activate(self, cfg, is_new_instance): + address_ephemeral_resize(is_new_instance=is_new_instance) + return -def find_fabric_formatted_ephemeral_part(): - """ - Locate the first fabric formatted ephemeral device. - """ - potential_locations = ['/dev/disk/cloud/azure_resource-part1', - '/dev/disk/azure/resource-part1'] - device_location = None - for potential_location in potential_locations: - if os.path.exists(potential_location): - device_location = potential_location +def can_dev_be_reformatted(devpath): + # determine if the ephemeral block device path devpath + # is newly formatted after a resize. + if not os.path.exists(devpath): + return False, 'device %s does not exist' % devpath + + realpath = os.path.realpath(devpath) + LOG.debug('Resolving realpath of %s -> %s', devpath, realpath) + + # it is possible that the block device might exist, but the kernel + # have not yet read the partition table and sent events. we udevadm settle + # to hope to resolve that. Better here would probably be to test and see, + # and then settle if we didn't find anything and try again. + if util.which("udevadm"): + util.subp(["udevadm", "settle"]) + + # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource + # where partitions are "1" or "-part1" or "p1" + part1path = None + for suff in ("-part", "p", ""): + cand = devpath + suff + "1" + if os.path.exists(cand): + if os.path.exists(devpath + suff + "2"): + msg = ('device %s had more than 1 partition: %s, %s' % + devpath, cand, devpath + suff + "2") + return False, msg + part1path = cand break - if device_location is None: - LOG.debug("no azure resource disk partition path found") - return None - ntfs_devices = util.find_devs_with("TYPE=ntfs") - real_device = os.path.realpath(device_location) - if real_device in ntfs_devices: - return device_location - LOG.debug("'%s' existed (%s) but was not ntfs formated", - device_location, real_device) - return None - - -def find_fabric_formatted_ephemeral_disk(): - """ - Get the ephemeral disk. - """ - part_dev = find_fabric_formatted_ephemeral_part() - if part_dev: - return part_dev.split('-')[0] - return None + if part1path is None: + return False, 'device %s was not partitioned' % devpath -def support_new_ephemeral(cfg): - """ - Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device - may be presented as a fresh device, or not. + real_part1path = os.path.realpath(part1path) + ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True) + LOG.debug('ntfs_devices found = %s', ntfs_devices) + if real_part1path not in ntfs_devices: + msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' % + (part1path, real_part1path, devpath)) + return False, msg - Since the knowledge of when a disk is supposed to be plowed under is - specific to Windows Azure, the logic resides here in the datasource. When a - new ephemeral device is detected, cloud-init overrides the default - frequency for both disk-setup and mounts for the current boot only. - """ - device = find_fabric_formatted_ephemeral_part() - if not device: - LOG.debug("no default fabric formated ephemeral0.1 found") - return None - LOG.debug("fabric formated ephemeral0.1 device at %s", device) + def count_files(mp): + ignored = {'dataloss_warning_readme.txt'} + return len([f for f in os.listdir(mp) if f.lower() not in ignored]) - file_count = 0 + bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' % + (part1path, real_part1path, devpath)) try: - file_count = util.mount_cb(device, count_files) - except Exception: - return None - LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count) - - if file_count >= 1: - LOG.debug("fabric prepared ephemeral0.1 will be preserved") - return None + file_count = util.mount_cb(part1path, count_files) + except util.MountFailedError as e: + return False, bmsg + ' but mount of %s failed: %s' % (part1path, e) + + if file_count != 0: + return False, bmsg + ' but had %d files on it.' % file_count + + return True, bmsg + ' and had no important files. Safe for reformatting.' + + +def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, + is_new_instance=False): + # wait for ephemeral disk to come up + naplen = .2 + missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen, + log_pre="Azure ephemeral disk: ") + + if missing: + LOG.warn("ephemeral device '%s' did not appear after %d seconds.", + devpath, maxwait) + return + + result = False + msg = None + if is_new_instance: + result, msg = (True, "First instance boot.") else: - # if device was already mounted, then we need to unmount it - # race conditions could allow for a check-then-unmount - # to have a false positive. so just unmount and then check. - try: - util.subp(['umount', device]) - except util.ProcessExecutionError as e: - if device in util.mounts(): - LOG.warn("Failed to unmount %s, will not reformat.", device) - LOG.debug("Failed umount: %s", e) - return None - - LOG.debug("cloud-init will format ephemeral0.1 this boot.") - LOG.debug("setting disk_setup and mounts modules 'always' for this boot") - - cc_modules = cfg.get('cloud_init_modules') - if not cc_modules: - return None - - mod_list = [] - for mod in cc_modules: - if mod in ("disk_setup", "mounts"): - mod_list.append([mod, PER_ALWAYS]) - LOG.debug("set module '%s' to 'always' for this boot", mod) + result, msg = can_dev_be_reformatted(devpath) + + LOG.debug("reformattable=%s: %s" % (result, msg)) + if not result: + return + + for mod in ['disk_setup', 'mounts']: + sempath = '/var/lib/cloud/instance/sem/config_' + mod + bmsg = 'Marker "%s" for module "%s"' % (sempath, mod) + if os.path.exists(sempath): + try: + os.unlink(sempath) + LOG.debug(bmsg + " removed.") + except Exception as e: + # python3 throws FileNotFoundError, python2 throws OSError + LOG.warn(bmsg + ": remove failed! (%s)" % e) else: - mod_list.append(mod) - return mod_list + LOG.debug(bmsg + " did not exist.") + return def perform_hostname_bounce(hostname, cfg, prev_hostname): @@ -408,15 +407,25 @@ def pubkeys_from_crt_files(flist): return pubkeys -def wait_for_files(flist, maxwait=60, naplen=.5): +def wait_for_files(flist, maxwait=60, naplen=.5, log_pre=""): need = set(flist) waited = 0 - while waited < maxwait: + while True: need -= set([f for f in need if os.path.exists(f)]) if len(need) == 0: + LOG.debug("%sAll files appeared after %s seconds: %s", + log_pre, waited, flist) return [] + if waited == 0: + LOG.info("%sWaiting up to %s seconds for the following files: %s", + log_pre, maxwait, flist) + if waited + naplen > maxwait: + break time.sleep(naplen) waited += naplen + + LOG.warn("%sStill missing files after %s seconds: %s", + log_pre, maxwait, need) return need -- cgit v1.2.3