diff options
| -rw-r--r-- | cloudinit/cmd/main.py | 3 | ||||
| -rw-r--r-- | cloudinit/config/cc_mounts.py | 12 | ||||
| -rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 199 | ||||
| -rw-r--r-- | cloudinit/sources/__init__.py | 12 | ||||
| -rw-r--r-- | cloudinit/stages.py | 7 | ||||
| -rw-r--r-- | tests/unittests/test_datasource/test_azure.py | 10 | 
6 files changed, 136 insertions, 107 deletions
| diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 83eb02c9..fe37075e 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -326,6 +326,9 @@ def main_init(name, args):          util.logexc(LOG, "Failed to re-adjust output redirection!")      logging.setupLogging(mods.cfg) +    # give the activated datasource a chance to adjust +    init.activate_datasource() +      # Stage 10      return (init.datasource, run_module_section(mods, name, name)) diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py index dfc4b598..452c9e83 100644 --- a/cloudinit/config/cc_mounts.py +++ b/cloudinit/config/cc_mounts.py @@ -312,7 +312,8 @@ def handle_swapcfg(swapcfg):  def handle(_name, cfg, cloud, log, _args):      # fs_spec, fs_file, fs_vfstype, fs_mntops, fs-freq, fs_passno      def_mnt_opts = "defaults,nobootwait" -    if cloud.distro.uses_systemd(): +    uses_systemd = cloud.distro.uses_systemd() +    if uses_systemd:          def_mnt_opts = "defaults,nofail,x-systemd.requires=cloud-init.service"      defvals = [None, None, "auto", def_mnt_opts, "0", "2"] @@ -447,7 +448,12 @@ def handle(_name, cfg, cloud, log, _args):          except Exception:              util.logexc(log, "Failed to make '%s' config-mount", d) +    activate_cmd = ["mount", "-a"] +    if uses_systemd: +        activate_cmd = ["systemctl", "daemon-reload"] +    fmt = "Activate mounts: %s:" + ' '.join(activate_cmd)      try: -        util.subp(("mount", "-a")) +        util.subp(activate_cmd) +        LOG.debug(fmt, "PASS")      except util.ProcessExecutionError: -        util.logexc(log, "Activating mounts via 'mount -a' failed") +        util.logexc(log, fmt, "FAIL") diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index b802b03e..22f9004a 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -19,7 +19,6 @@  import base64  import contextlib  import crypt -import fnmatch  from functools import partial  import os  import os.path @@ -28,7 +27,6 @@ from xml.dom import minidom  import xml.etree.ElementTree as ET  from cloudinit import log as logging -from cloudinit.settings import PER_ALWAYS  from cloudinit import sources  from cloudinit.sources.helpers.azure import get_metadata_from_fabric  from cloudinit import util @@ -42,6 +40,9 @@ BOUNCE_COMMAND = [      'sh', '-xc',      "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"  ] +# azure systems will always have a resource disk, and 66-azure-ephemeral.rules +# ensures that it gets linked to this path. +RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource'  BUILTIN_DS_CONFIG = {      'agent_command': AGENT_START, @@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = {          'command': BOUNCE_COMMAND,          'hostname_command': 'hostname',      }, -    'disk_aliases': {'ephemeral0': '/dev/sdb'}, +    'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH},      'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases',  } @@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource):          self.metadata['instance-id'] = util.read_dmi_data('system-uuid')          self.metadata.update(fabric_data) -        found_ephemeral = find_fabric_formatted_ephemeral_disk() -        if found_ephemeral: -            self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral -            LOG.debug("using detected ephemeral0 of %s", found_ephemeral) - -        cc_modules_override = support_new_ephemeral(self.sys_cfg) -        if cc_modules_override: -            self.cfg['cloud_init_modules'] = cc_modules_override -          return True      def device_name_to_device(self, name): @@ -266,97 +258,104 @@ class DataSourceAzureNet(sources.DataSource):          # quickly (local check only) if self.instance_id is still valid          return sources.instance_id_matches_system_uuid(self.get_instance_id()) - -def count_files(mp): -    return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) +    def activate(self, cfg, is_new_instance): +        address_ephemeral_resize(is_new_instance=is_new_instance) +        return -def find_fabric_formatted_ephemeral_part(): -    """ -    Locate the first fabric formatted ephemeral device. -    """ -    potential_locations = ['/dev/disk/cloud/azure_resource-part1', -                           '/dev/disk/azure/resource-part1'] -    device_location = None -    for potential_location in potential_locations: -        if os.path.exists(potential_location): -            device_location = potential_location +def can_dev_be_reformatted(devpath): +    # determine if the ephemeral block device path devpath +    # is newly formatted after a resize. +    if not os.path.exists(devpath): +        return False, 'device %s does not exist' % devpath + +    realpath = os.path.realpath(devpath) +    LOG.debug('Resolving realpath of %s -> %s', devpath, realpath) + +    # it is possible that the block device might exist, but the kernel +    # have not yet read the partition table and sent events.  we udevadm settle +    # to hope to resolve that.  Better here would probably be to test and see, +    # and then settle if we didn't find anything and try again. +    if util.which("udevadm"): +        util.subp(["udevadm", "settle"]) + +    # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource +    # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1" +    part1path = None +    for suff in ("-part", "p", ""): +        cand = devpath + suff + "1" +        if os.path.exists(cand): +            if os.path.exists(devpath + suff + "2"): +                msg = ('device %s had more than 1 partition: %s, %s' % +                       devpath, cand, devpath + suff + "2") +                return False, msg +            part1path = cand              break -    if device_location is None: -        LOG.debug("no azure resource disk partition path found") -        return None -    ntfs_devices = util.find_devs_with("TYPE=ntfs") -    real_device = os.path.realpath(device_location) -    if real_device in ntfs_devices: -        return device_location -    LOG.debug("'%s' existed (%s) but was not ntfs formated", -              device_location, real_device) -    return None - - -def find_fabric_formatted_ephemeral_disk(): -    """ -    Get the ephemeral disk. -    """ -    part_dev = find_fabric_formatted_ephemeral_part() -    if part_dev: -        return part_dev.split('-')[0] -    return None +    if part1path is None: +        return False, 'device %s was not partitioned' % devpath -def support_new_ephemeral(cfg): -    """ -    Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device -    may be presented as a fresh device, or not. +    real_part1path = os.path.realpath(part1path) +    ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True) +    LOG.debug('ntfs_devices found = %s', ntfs_devices) +    if real_part1path not in ntfs_devices: +        msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' % +               (part1path, real_part1path, devpath)) +        return False, msg -    Since the knowledge of when a disk is supposed to be plowed under is -    specific to Windows Azure, the logic resides here in the datasource. When a -    new ephemeral device is detected, cloud-init overrides the default -    frequency for both disk-setup and mounts for the current boot only. -    """ -    device = find_fabric_formatted_ephemeral_part() -    if not device: -        LOG.debug("no default fabric formated ephemeral0.1 found") -        return None -    LOG.debug("fabric formated ephemeral0.1 device at %s", device) +    def count_files(mp): +        ignored = {'dataloss_warning_readme.txt'} +        return len([f for f in os.listdir(mp) if f.lower() not in ignored]) -    file_count = 0 +    bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' % +            (part1path, real_part1path, devpath))      try: -        file_count = util.mount_cb(device, count_files) -    except Exception: -        return None -    LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count) - -    if file_count >= 1: -        LOG.debug("fabric prepared ephemeral0.1 will be preserved") -        return None +        file_count = util.mount_cb(part1path, count_files) +    except util.MountFailedError as e: +        return False, bmsg + ' but mount of %s failed: %s' % (part1path, e) + +    if file_count != 0: +        return False, bmsg + ' but had %d files on it.' % file_count + +    return True, bmsg + ' and had no important files. Safe for reformatting.' + + +def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, +                             is_new_instance=False): +    # wait for ephemeral disk to come up +    naplen = .2 +    missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen, +                             log_pre="Azure ephemeral disk: ") + +    if missing: +        LOG.warn("ephemeral device '%s' did not appear after %d seconds.", +                 devpath, maxwait) +        return + +    result = False +    msg = None +    if is_new_instance: +        result, msg = (True, "First instance boot.")      else: -        # if device was already mounted, then we need to unmount it -        # race conditions could allow for a check-then-unmount -        # to have a false positive. so just unmount and then check. -        try: -            util.subp(['umount', device]) -        except util.ProcessExecutionError as e: -            if device in util.mounts(): -                LOG.warn("Failed to unmount %s, will not reformat.", device) -                LOG.debug("Failed umount: %s", e) -                return None - -    LOG.debug("cloud-init will format ephemeral0.1 this boot.") -    LOG.debug("setting disk_setup and mounts modules 'always' for this boot") - -    cc_modules = cfg.get('cloud_init_modules') -    if not cc_modules: -        return None - -    mod_list = [] -    for mod in cc_modules: -        if mod in ("disk_setup", "mounts"): -            mod_list.append([mod, PER_ALWAYS]) -            LOG.debug("set module '%s' to 'always' for this boot", mod) +        result, msg = can_dev_be_reformatted(devpath) + +    LOG.debug("reformattable=%s: %s" % (result, msg)) +    if not result: +        return + +    for mod in ['disk_setup', 'mounts']: +        sempath = '/var/lib/cloud/instance/sem/config_' + mod +        bmsg = 'Marker "%s" for module "%s"' % (sempath, mod) +        if os.path.exists(sempath): +            try: +                os.unlink(sempath) +                LOG.debug(bmsg + " removed.") +            except Exception as e: +                # python3 throws FileNotFoundError, python2 throws OSError +                LOG.warn(bmsg + ": remove failed! (%s)" % e)          else: -            mod_list.append(mod) -    return mod_list +            LOG.debug(bmsg + " did not exist.") +    return  def perform_hostname_bounce(hostname, cfg, prev_hostname): @@ -408,15 +407,25 @@ def pubkeys_from_crt_files(flist):      return pubkeys -def wait_for_files(flist, maxwait=60, naplen=.5): +def wait_for_files(flist, maxwait=60, naplen=.5, log_pre=""):      need = set(flist)      waited = 0 -    while waited < maxwait: +    while True:          need -= set([f for f in need if os.path.exists(f)])          if len(need) == 0: +            LOG.debug("%sAll files appeared after %s seconds: %s", +                      log_pre, waited, flist)              return [] +        if waited == 0: +            LOG.info("%sWaiting up to %s seconds for the following files: %s", +                     log_pre, maxwait, flist) +        if waited + naplen > maxwait: +            break          time.sleep(naplen)          waited += naplen + +    LOG.warn("%sStill missing files after %s seconds: %s", +             log_pre, maxwait, need)      return need diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index d1395270..13fb7c62 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -261,6 +261,18 @@ class DataSource(object):      def first_instance_boot(self):          return +    def activate(self, cfg, is_new_instance): +        """activate(cfg, is_new_instance) + +        This is called before the init_modules will be called. +        The cfg is fully up to date config, it contains a merged view of +           system config, datasource config, user config, vendor config. +        It should be used rather than the sys_cfg passed to __init__. + +        is_new_instance is a boolean indicating if this is a new instance. +        """ +        return +  def normalize_pubkey_data(pubkey_data):      keys = [] diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 47deac6e..86a13785 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -371,6 +371,13 @@ class Init(object):          self._store_userdata()          self._store_vendordata() +    def activate_datasource(self): +        if self.datasource is None: +            raise RuntimeError("Datasource is None, cannot activate.") +        self.datasource.activate(cfg=self.cfg, +                                 is_new_instance=self.is_new_instance()) +        self._write_to_cache() +      def _store_userdata(self):          raw_ud = self.datasource.get_userdata_raw()          if raw_ud is None: diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index e90e903c..07127008 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -349,7 +349,7 @@ class TestAzureDataSource(TestCase):          cfg = dsrc.get_config_obj()          self.assertEqual(dsrc.device_name_to_device("ephemeral0"), -                         "/dev/sdb") +                         DataSourceAzure.RESOURCE_DISK_PATH)          assert 'disk_setup' in cfg          assert 'fs_setup' in cfg          self.assertIsInstance(cfg['disk_setup'], dict) @@ -462,14 +462,6 @@ class TestAzureBounce(TestCase):              mock.patch.object(DataSourceAzure, 'list_possible_azure_ds_devs',                                mock.MagicMock(return_value=[])))          self.patches.enter_context( -            mock.patch.object(DataSourceAzure, -                              'find_fabric_formatted_ephemeral_disk', -                              mock.MagicMock(return_value=None))) -        self.patches.enter_context( -            mock.patch.object(DataSourceAzure, -                              'find_fabric_formatted_ephemeral_part', -                              mock.MagicMock(return_value=None))) -        self.patches.enter_context(              mock.patch.object(DataSourceAzure, 'get_metadata_from_fabric',                                mock.MagicMock(return_value={})))          self.patches.enter_context( | 
