path: root/cloudinit
diff options
authorScott Moser <>2016-11-17 14:46:23 -0500
committerScott Moser <>2016-11-18 16:33:51 -0500
commit9e904bbc3336b96475bfd00fb3bf1262ae4de49f (patch)
treee22e9c0048d723f1b4d99634969a760c34d8da22 /cloudinit
parent9d826b8855797bd37e477b6da43153c49529afe8 (diff)
Add activate_datasource, for datasource specific code paths.
This adds a call to 'activate_datasource'. That will be called during init stage (or init-local in the event of a 'local' dsmode). It is present so that the datasource can do platform specific operations that may be necessary. It is passed the fully rendered cloud-config and whether or not the instance is a new instance. The Azure datasource uses this to address formatting of the ephemeral devices. It does so by a.) waiting for the device to come online b.) removing the marker files for the disk_setup and mounts modules if it finds that the ephemeral device has been reset. LP: #1611074
Diffstat (limited to 'cloudinit')
5 files changed, 135 insertions, 98 deletions
diff --git a/cloudinit/cmd/ b/cloudinit/cmd/
index 83eb02c9..fe37075e 100644
--- a/cloudinit/cmd/
+++ b/cloudinit/cmd/
@@ -326,6 +326,9 @@ def main_init(name, args):
util.logexc(LOG, "Failed to re-adjust output redirection!")
+ # give the activated datasource a chance to adjust
+ init.activate_datasource()
# Stage 10
return (init.datasource, run_module_section(mods, name, name))
diff --git a/cloudinit/config/ b/cloudinit/config/
index dfc4b598..452c9e83 100644
--- a/cloudinit/config/
+++ b/cloudinit/config/
@@ -312,7 +312,8 @@ def handle_swapcfg(swapcfg):
def handle(_name, cfg, cloud, log, _args):
# fs_spec, fs_file, fs_vfstype, fs_mntops, fs-freq, fs_passno
def_mnt_opts = "defaults,nobootwait"
- if cloud.distro.uses_systemd():
+ uses_systemd = cloud.distro.uses_systemd()
+ if uses_systemd:
def_mnt_opts = "defaults,nofail,x-systemd.requires=cloud-init.service"
defvals = [None, None, "auto", def_mnt_opts, "0", "2"]
@@ -447,7 +448,12 @@ def handle(_name, cfg, cloud, log, _args):
except Exception:
util.logexc(log, "Failed to make '%s' config-mount", d)
+ activate_cmd = ["mount", "-a"]
+ if uses_systemd:
+ activate_cmd = ["systemctl", "daemon-reload"]
+ fmt = "Activate mounts: %s:" + ' '.join(activate_cmd)
- util.subp(("mount", "-a"))
+ util.subp(activate_cmd)
+ LOG.debug(fmt, "PASS")
except util.ProcessExecutionError:
- util.logexc(log, "Activating mounts via 'mount -a' failed")
+ util.logexc(log, fmt, "FAIL")
diff --git a/cloudinit/sources/ b/cloudinit/sources/
index b802b03e..22f9004a 100644
--- a/cloudinit/sources/
+++ b/cloudinit/sources/
@@ -19,7 +19,6 @@
import base64
import contextlib
import crypt
-import fnmatch
from functools import partial
import os
import os.path
@@ -28,7 +27,6 @@ from xml.dom import minidom
import xml.etree.ElementTree as ET
from cloudinit import log as logging
-from cloudinit.settings import PER_ALWAYS
from cloudinit import sources
from import get_metadata_from_fabric
from cloudinit import util
@@ -42,6 +40,9 @@ BOUNCE_COMMAND = [
'sh', '-xc',
"i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"
+# azure systems will always have a resource disk, and 66-azure-ephemeral.rules
+# ensures that it gets linked to this path.
+RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource'
'agent_command': AGENT_START,
@@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = {
'command': BOUNCE_COMMAND,
'hostname_command': 'hostname',
- 'disk_aliases': {'ephemeral0': '/dev/sdb'},
+ 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH},
'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases',
@@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource):
self.metadata['instance-id'] = util.read_dmi_data('system-uuid')
- found_ephemeral = find_fabric_formatted_ephemeral_disk()
- if found_ephemeral:
- self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral
- LOG.debug("using detected ephemeral0 of %s", found_ephemeral)
- cc_modules_override = support_new_ephemeral(self.sys_cfg)
- if cc_modules_override:
- self.cfg['cloud_init_modules'] = cc_modules_override
return True
def device_name_to_device(self, name):
@@ -266,97 +258,104 @@ class DataSourceAzureNet(sources.DataSource):
# quickly (local check only) if self.instance_id is still valid
return sources.instance_id_matches_system_uuid(self.get_instance_id())
-def count_files(mp):
- return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*'))
+ def activate(self, cfg, is_new_instance):
+ address_ephemeral_resize(is_new_instance=is_new_instance)
+ return
-def find_fabric_formatted_ephemeral_part():
- """
- Locate the first fabric formatted ephemeral device.
- """
- potential_locations = ['/dev/disk/cloud/azure_resource-part1',
- '/dev/disk/azure/resource-part1']
- device_location = None
- for potential_location in potential_locations:
- if os.path.exists(potential_location):
- device_location = potential_location
+def can_dev_be_reformatted(devpath):
+ # determine if the ephemeral block device path devpath
+ # is newly formatted after a resize.
+ if not os.path.exists(devpath):
+ return False, 'device %s does not exist' % devpath
+ realpath = os.path.realpath(devpath)
+ LOG.debug('Resolving realpath of %s -> %s', devpath, realpath)
+ # it is possible that the block device might exist, but the kernel
+ # have not yet read the partition table and sent events. we udevadm settle
+ # to hope to resolve that. Better here would probably be to test and see,
+ # and then settle if we didn't find anything and try again.
+ if util.which("udevadm"):
+ util.subp(["udevadm", "settle"])
+ # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource
+ # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1"
+ part1path = None
+ for suff in ("-part", "p", ""):
+ cand = devpath + suff + "1"
+ if os.path.exists(cand):
+ if os.path.exists(devpath + suff + "2"):
+ msg = ('device %s had more than 1 partition: %s, %s' %
+ devpath, cand, devpath + suff + "2")
+ return False, msg
+ part1path = cand
- if device_location is None:
- LOG.debug("no azure resource disk partition path found")
- return None
- ntfs_devices = util.find_devs_with("TYPE=ntfs")
- real_device = os.path.realpath(device_location)
- if real_device in ntfs_devices:
- return device_location
- LOG.debug("'%s' existed (%s) but was not ntfs formated",
- device_location, real_device)
- return None
-def find_fabric_formatted_ephemeral_disk():
- """
- Get the ephemeral disk.
- """
- part_dev = find_fabric_formatted_ephemeral_part()
- if part_dev:
- return part_dev.split('-')[0]
- return None
+ if part1path is None:
+ return False, 'device %s was not partitioned' % devpath
-def support_new_ephemeral(cfg):
- """
- Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device
- may be presented as a fresh device, or not.
+ real_part1path = os.path.realpath(part1path)
+ ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True)
+ LOG.debug('ntfs_devices found = %s', ntfs_devices)
+ if real_part1path not in ntfs_devices:
+ msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' %
+ (part1path, real_part1path, devpath))
+ return False, msg
- Since the knowledge of when a disk is supposed to be plowed under is
- specific to Windows Azure, the logic resides here in the datasource. When a
- new ephemeral device is detected, cloud-init overrides the default
- frequency for both disk-setup and mounts for the current boot only.
- """
- device = find_fabric_formatted_ephemeral_part()
- if not device:
- LOG.debug("no default fabric formated ephemeral0.1 found")
- return None
- LOG.debug("fabric formated ephemeral0.1 device at %s", device)
+ def count_files(mp):
+ ignored = {'dataloss_warning_readme.txt'}
+ return len([f for f in os.listdir(mp) if f.lower() not in ignored])
- file_count = 0
+ bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' %
+ (part1path, real_part1path, devpath))
- file_count = util.mount_cb(device, count_files)
- except Exception:
- return None
- LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count)
- if file_count >= 1:
- LOG.debug("fabric prepared ephemeral0.1 will be preserved")
- return None
+ file_count = util.mount_cb(part1path, count_files)
+ except util.MountFailedError as e:
+ return False, bmsg + ' but mount of %s failed: %s' % (part1path, e)
+ if file_count != 0:
+ return False, bmsg + ' but had %d files on it.' % file_count
+ return True, bmsg + ' and had no important files. Safe for reformatting.'
+def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
+ is_new_instance=False):
+ # wait for ephemeral disk to come up
+ naplen = .2
+ missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen,
+ log_pre="Azure ephemeral disk: ")
+ if missing:
+ LOG.warn("ephemeral device '%s' did not appear after %d seconds.",
+ devpath, maxwait)
+ return
+ result = False
+ msg = None
+ if is_new_instance:
+ result, msg = (True, "First instance boot.")
- # if device was already mounted, then we need to unmount it
- # race conditions could allow for a check-then-unmount
- # to have a false positive. so just unmount and then check.
- try:
- util.subp(['umount', device])
- except util.ProcessExecutionError as e:
- if device in util.mounts():
- LOG.warn("Failed to unmount %s, will not reformat.", device)
- LOG.debug("Failed umount: %s", e)
- return None
- LOG.debug("cloud-init will format ephemeral0.1 this boot.")
- LOG.debug("setting disk_setup and mounts modules 'always' for this boot")
- cc_modules = cfg.get('cloud_init_modules')
- if not cc_modules:
- return None
- mod_list = []
- for mod in cc_modules:
- if mod in ("disk_setup", "mounts"):
- mod_list.append([mod, PER_ALWAYS])
- LOG.debug("set module '%s' to 'always' for this boot", mod)
+ result, msg = can_dev_be_reformatted(devpath)
+ LOG.debug("reformattable=%s: %s" % (result, msg))
+ if not result:
+ return
+ for mod in ['disk_setup', 'mounts']:
+ sempath = '/var/lib/cloud/instance/sem/config_' + mod
+ bmsg = 'Marker "%s" for module "%s"' % (sempath, mod)
+ if os.path.exists(sempath):
+ try:
+ os.unlink(sempath)
+ LOG.debug(bmsg + " removed.")
+ except Exception as e:
+ # python3 throws FileNotFoundError, python2 throws OSError
+ LOG.warn(bmsg + ": remove failed! (%s)" % e)
- mod_list.append(mod)
- return mod_list
+ LOG.debug(bmsg + " did not exist.")
+ return
def perform_hostname_bounce(hostname, cfg, prev_hostname):
@@ -408,15 +407,25 @@ def pubkeys_from_crt_files(flist):
return pubkeys
-def wait_for_files(flist, maxwait=60, naplen=.5):
+def wait_for_files(flist, maxwait=60, naplen=.5, log_pre=""):
need = set(flist)
waited = 0
- while waited < maxwait:
+ while True:
need -= set([f for f in need if os.path.exists(f)])
if len(need) == 0:
+ LOG.debug("%sAll files appeared after %s seconds: %s",
+ log_pre, waited, flist)
return []
+ if waited == 0:
+"%sWaiting up to %s seconds for the following files: %s",
+ log_pre, maxwait, flist)
+ if waited + naplen > maxwait:
+ break
waited += naplen
+ LOG.warn("%sStill missing files after %s seconds: %s",
+ log_pre, maxwait, need)
return need
diff --git a/cloudinit/sources/ b/cloudinit/sources/
index d1395270..13fb7c62 100644
--- a/cloudinit/sources/
+++ b/cloudinit/sources/
@@ -261,6 +261,18 @@ class DataSource(object):
def first_instance_boot(self):
+ def activate(self, cfg, is_new_instance):
+ """activate(cfg, is_new_instance)
+ This is called before the init_modules will be called.
+ The cfg is fully up to date config, it contains a merged view of
+ system config, datasource config, user config, vendor config.
+ It should be used rather than the sys_cfg passed to __init__.
+ is_new_instance is a boolean indicating if this is a new instance.
+ """
+ return
def normalize_pubkey_data(pubkey_data):
keys = []
diff --git a/cloudinit/ b/cloudinit/
index 47deac6e..86a13785 100644
--- a/cloudinit/
+++ b/cloudinit/
@@ -371,6 +371,13 @@ class Init(object):
+ def activate_datasource(self):
+ if self.datasource is None:
+ raise RuntimeError("Datasource is None, cannot activate.")
+ self.datasource.activate(cfg=self.cfg,
+ is_new_instance=self.is_new_instance())
+ self._write_to_cache()
def _store_userdata(self):
raw_ud = self.datasource.get_userdata_raw()
if raw_ud is None: