diff options
Diffstat (limited to 'cloudinit')
-rw-r--r-- | cloudinit/cmd/main.py | 3 | ||||
-rw-r--r-- | cloudinit/config/cc_mounts.py | 12 | ||||
-rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 199 | ||||
-rw-r--r-- | cloudinit/sources/__init__.py | 12 | ||||
-rw-r--r-- | cloudinit/stages.py | 7 |
5 files changed, 135 insertions, 98 deletions
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 83eb02c9..fe37075e 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -326,6 +326,9 @@ def main_init(name, args): util.logexc(LOG, "Failed to re-adjust output redirection!") logging.setupLogging(mods.cfg) + # give the activated datasource a chance to adjust + init.activate_datasource() + # Stage 10 return (init.datasource, run_module_section(mods, name, name)) diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py index dfc4b598..452c9e83 100644 --- a/cloudinit/config/cc_mounts.py +++ b/cloudinit/config/cc_mounts.py @@ -312,7 +312,8 @@ def handle_swapcfg(swapcfg): def handle(_name, cfg, cloud, log, _args): # fs_spec, fs_file, fs_vfstype, fs_mntops, fs-freq, fs_passno def_mnt_opts = "defaults,nobootwait" - if cloud.distro.uses_systemd(): + uses_systemd = cloud.distro.uses_systemd() + if uses_systemd: def_mnt_opts = "defaults,nofail,x-systemd.requires=cloud-init.service" defvals = [None, None, "auto", def_mnt_opts, "0", "2"] @@ -447,7 +448,12 @@ def handle(_name, cfg, cloud, log, _args): except Exception: util.logexc(log, "Failed to make '%s' config-mount", d) + activate_cmd = ["mount", "-a"] + if uses_systemd: + activate_cmd = ["systemctl", "daemon-reload"] + fmt = "Activate mounts: %s:" + ' '.join(activate_cmd) try: - util.subp(("mount", "-a")) + util.subp(activate_cmd) + LOG.debug(fmt, "PASS") except util.ProcessExecutionError: - util.logexc(log, "Activating mounts via 'mount -a' failed") + util.logexc(log, fmt, "FAIL") diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index b802b03e..22f9004a 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -19,7 +19,6 @@ import base64 import contextlib import crypt -import fnmatch from functools import partial import os import os.path @@ -28,7 +27,6 @@ from xml.dom import minidom import xml.etree.ElementTree as ET from cloudinit import log as logging -from cloudinit.settings import PER_ALWAYS from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric from cloudinit import util @@ -42,6 +40,9 @@ BOUNCE_COMMAND = [ 'sh', '-xc', "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x" ] +# azure systems will always have a resource disk, and 66-azure-ephemeral.rules +# ensures that it gets linked to this path. +RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource' BUILTIN_DS_CONFIG = { 'agent_command': AGENT_START, @@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = { 'command': BOUNCE_COMMAND, 'hostname_command': 'hostname', }, - 'disk_aliases': {'ephemeral0': '/dev/sdb'}, + 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH}, 'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases', } @@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource): self.metadata['instance-id'] = util.read_dmi_data('system-uuid') self.metadata.update(fabric_data) - found_ephemeral = find_fabric_formatted_ephemeral_disk() - if found_ephemeral: - self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral - LOG.debug("using detected ephemeral0 of %s", found_ephemeral) - - cc_modules_override = support_new_ephemeral(self.sys_cfg) - if cc_modules_override: - self.cfg['cloud_init_modules'] = cc_modules_override - return True def device_name_to_device(self, name): @@ -266,97 +258,104 @@ class DataSourceAzureNet(sources.DataSource): # quickly (local check only) if self.instance_id is still valid return sources.instance_id_matches_system_uuid(self.get_instance_id()) - -def count_files(mp): - return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) + def activate(self, cfg, is_new_instance): + address_ephemeral_resize(is_new_instance=is_new_instance) + return -def find_fabric_formatted_ephemeral_part(): - """ - Locate the first fabric formatted ephemeral device. - """ - potential_locations = ['/dev/disk/cloud/azure_resource-part1', - '/dev/disk/azure/resource-part1'] - device_location = None - for potential_location in potential_locations: - if os.path.exists(potential_location): - device_location = potential_location +def can_dev_be_reformatted(devpath): + # determine if the ephemeral block device path devpath + # is newly formatted after a resize. + if not os.path.exists(devpath): + return False, 'device %s does not exist' % devpath + + realpath = os.path.realpath(devpath) + LOG.debug('Resolving realpath of %s -> %s', devpath, realpath) + + # it is possible that the block device might exist, but the kernel + # have not yet read the partition table and sent events. we udevadm settle + # to hope to resolve that. Better here would probably be to test and see, + # and then settle if we didn't find anything and try again. + if util.which("udevadm"): + util.subp(["udevadm", "settle"]) + + # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource + # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1" + part1path = None + for suff in ("-part", "p", ""): + cand = devpath + suff + "1" + if os.path.exists(cand): + if os.path.exists(devpath + suff + "2"): + msg = ('device %s had more than 1 partition: %s, %s' % + devpath, cand, devpath + suff + "2") + return False, msg + part1path = cand break - if device_location is None: - LOG.debug("no azure resource disk partition path found") - return None - ntfs_devices = util.find_devs_with("TYPE=ntfs") - real_device = os.path.realpath(device_location) - if real_device in ntfs_devices: - return device_location - LOG.debug("'%s' existed (%s) but was not ntfs formated", - device_location, real_device) - return None - - -def find_fabric_formatted_ephemeral_disk(): - """ - Get the ephemeral disk. - """ - part_dev = find_fabric_formatted_ephemeral_part() - if part_dev: - return part_dev.split('-')[0] - return None + if part1path is None: + return False, 'device %s was not partitioned' % devpath -def support_new_ephemeral(cfg): - """ - Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device - may be presented as a fresh device, or not. + real_part1path = os.path.realpath(part1path) + ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True) + LOG.debug('ntfs_devices found = %s', ntfs_devices) + if real_part1path not in ntfs_devices: + msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' % + (part1path, real_part1path, devpath)) + return False, msg - Since the knowledge of when a disk is supposed to be plowed under is - specific to Windows Azure, the logic resides here in the datasource. When a - new ephemeral device is detected, cloud-init overrides the default - frequency for both disk-setup and mounts for the current boot only. - """ - device = find_fabric_formatted_ephemeral_part() - if not device: - LOG.debug("no default fabric formated ephemeral0.1 found") - return None - LOG.debug("fabric formated ephemeral0.1 device at %s", device) + def count_files(mp): + ignored = {'dataloss_warning_readme.txt'} + return len([f for f in os.listdir(mp) if f.lower() not in ignored]) - file_count = 0 + bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' % + (part1path, real_part1path, devpath)) try: - file_count = util.mount_cb(device, count_files) - except Exception: - return None - LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count) - - if file_count >= 1: - LOG.debug("fabric prepared ephemeral0.1 will be preserved") - return None + file_count = util.mount_cb(part1path, count_files) + except util.MountFailedError as e: + return False, bmsg + ' but mount of %s failed: %s' % (part1path, e) + + if file_count != 0: + return False, bmsg + ' but had %d files on it.' % file_count + + return True, bmsg + ' and had no important files. Safe for reformatting.' + + +def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, + is_new_instance=False): + # wait for ephemeral disk to come up + naplen = .2 + missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen, + log_pre="Azure ephemeral disk: ") + + if missing: + LOG.warn("ephemeral device '%s' did not appear after %d seconds.", + devpath, maxwait) + return + + result = False + msg = None + if is_new_instance: + result, msg = (True, "First instance boot.") else: - # if device was already mounted, then we need to unmount it - # race conditions could allow for a check-then-unmount - # to have a false positive. so just unmount and then check. - try: - util.subp(['umount', device]) - except util.ProcessExecutionError as e: - if device in util.mounts(): - LOG.warn("Failed to unmount %s, will not reformat.", device) - LOG.debug("Failed umount: %s", e) - return None - - LOG.debug("cloud-init will format ephemeral0.1 this boot.") - LOG.debug("setting disk_setup and mounts modules 'always' for this boot") - - cc_modules = cfg.get('cloud_init_modules') - if not cc_modules: - return None - - mod_list = [] - for mod in cc_modules: - if mod in ("disk_setup", "mounts"): - mod_list.append([mod, PER_ALWAYS]) - LOG.debug("set module '%s' to 'always' for this boot", mod) + result, msg = can_dev_be_reformatted(devpath) + + LOG.debug("reformattable=%s: %s" % (result, msg)) + if not result: + return + + for mod in ['disk_setup', 'mounts']: + sempath = '/var/lib/cloud/instance/sem/config_' + mod + bmsg = 'Marker "%s" for module "%s"' % (sempath, mod) + if os.path.exists(sempath): + try: + os.unlink(sempath) + LOG.debug(bmsg + " removed.") + except Exception as e: + # python3 throws FileNotFoundError, python2 throws OSError + LOG.warn(bmsg + ": remove failed! (%s)" % e) else: - mod_list.append(mod) - return mod_list + LOG.debug(bmsg + " did not exist.") + return def perform_hostname_bounce(hostname, cfg, prev_hostname): @@ -408,15 +407,25 @@ def pubkeys_from_crt_files(flist): return pubkeys -def wait_for_files(flist, maxwait=60, naplen=.5): +def wait_for_files(flist, maxwait=60, naplen=.5, log_pre=""): need = set(flist) waited = 0 - while waited < maxwait: + while True: need -= set([f for f in need if os.path.exists(f)]) if len(need) == 0: + LOG.debug("%sAll files appeared after %s seconds: %s", + log_pre, waited, flist) return [] + if waited == 0: + LOG.info("%sWaiting up to %s seconds for the following files: %s", + log_pre, maxwait, flist) + if waited + naplen > maxwait: + break time.sleep(naplen) waited += naplen + + LOG.warn("%sStill missing files after %s seconds: %s", + log_pre, maxwait, need) return need diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index d1395270..13fb7c62 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -261,6 +261,18 @@ class DataSource(object): def first_instance_boot(self): return + def activate(self, cfg, is_new_instance): + """activate(cfg, is_new_instance) + + This is called before the init_modules will be called. + The cfg is fully up to date config, it contains a merged view of + system config, datasource config, user config, vendor config. + It should be used rather than the sys_cfg passed to __init__. + + is_new_instance is a boolean indicating if this is a new instance. + """ + return + def normalize_pubkey_data(pubkey_data): keys = [] diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 47deac6e..86a13785 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -371,6 +371,13 @@ class Init(object): self._store_userdata() self._store_vendordata() + def activate_datasource(self): + if self.datasource is None: + raise RuntimeError("Datasource is None, cannot activate.") + self.datasource.activate(cfg=self.cfg, + is_new_instance=self.is_new_instance()) + self._write_to_cache() + def _store_userdata(self): raw_ud = self.datasource.get_userdata_raw() if raw_ud is None: |