# vi: ts=4 expandtab # # Copyright (C) 2013 Canonical Ltd. # # Author: Scott Moser # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License version 3, as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import base64 import contextlib import crypt import fnmatch import os import os.path import time import xml.etree.ElementTree as ET from xml.dom import minidom from cloudinit import log as logging from cloudinit.settings import PER_ALWAYS from cloudinit import sources from cloudinit import util from cloudinit.sources.helpers.azure import ( get_metadata_from_fabric, iid_from_shared_config_content) LOG = logging.getLogger(__name__) DS_NAME = 'Azure' DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"} AGENT_START = ['service', 'walinuxagent', 'start'] BOUNCE_COMMAND = ['sh', '-xc', "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"] DATA_DIR_CLEAN_LIST = ['SharedConfig.xml'] BUILTIN_DS_CONFIG = { 'agent_command': AGENT_START, 'data_dir': "/var/lib/waagent", 'set_hostname': True, 'hostname_bounce': { 'interface': 'eth0', 'policy': True, 'command': BOUNCE_COMMAND, 'hostname_command': 'hostname', }, 'disk_aliases': {'ephemeral0': '/dev/sdb'}, } BUILTIN_CLOUD_CONFIG = { 'disk_setup': { 'ephemeral0': {'table_type': 'gpt', 'layout': [100], 'overwrite': True}, }, 'fs_setup': [{'filesystem': 'ext4', 'device': 'ephemeral0.1', 'replace_fs': 'ntfs'}], } DS_CFG_PATH = ['datasource', DS_NAME] DEF_EPHEMERAL_LABEL = 'Temporary Storage' # The redacted password fails to meet password complexity requirements # so we can safely use this to mask/redact the password in the ovf-env.xml DEF_PASSWD_REDACTION = 'REDACTED' def get_hostname(hostname_command='hostname'): return util.subp(hostname_command, capture=True)[0].strip() def set_hostname(hostname, hostname_command='hostname'): util.subp([hostname_command, hostname]) @contextlib.contextmanager def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'): """ Set a temporary hostname, restoring the previous hostname on exit. Will have the value of the previous hostname when used as a context manager, or None if the hostname was not changed. """ policy = cfg['hostname_bounce']['policy'] previous_hostname = get_hostname(hostname_command) if (not util.is_true(cfg.get('set_hostname')) or util.is_false(policy) or (previous_hostname == temp_hostname and policy != 'force')): yield None return set_hostname(temp_hostname, hostname_command) try: yield previous_hostname finally: set_hostname(previous_hostname, hostname_command) class DataSourceAzureNet(sources.DataSource): def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) self.seed_dir = os.path.join(paths.seed_dir, 'azure') self.cfg = {} self.seed = None self.ds_cfg = util.mergemanydict([ util.get_cfg_by_path(sys_cfg, DS_CFG_PATH, {}), BUILTIN_DS_CONFIG]) def __str__(self): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.seed) def get_metadata_from_agent(self): temp_hostname = self.metadata.get('local-hostname') hostname_command = self.ds_cfg['hostname_bounce']['hostname_command'] with temporary_hostname(temp_hostname, self.ds_cfg, hostname_command=hostname_command) \ as previous_hostname: if (previous_hostname is not None and util.is_true(self.ds_cfg.get('set_hostname'))): cfg = self.ds_cfg['hostname_bounce'] try: perform_hostname_bounce(hostname=temp_hostname, cfg=cfg, prev_hostname=previous_hostname) except Exception as e: LOG.warn("Failed publishing hostname: %s", e) util.logexc(LOG, "handling set_hostname failed") try: invoke_agent(self.ds_cfg['agent_command']) except util.ProcessExecutionError: # claim the datasource even if the command failed util.logexc(LOG, "agent command '%s' failed.", self.ds_cfg['agent_command']) ddir = self.ds_cfg['data_dir'] shcfgxml = os.path.join(ddir, "SharedConfig.xml") wait_for = [shcfgxml] fp_files = [] for pk in self.cfg.get('_pubkeys', []): bname = str(pk['fingerprint'] + ".crt") fp_files += [os.path.join(ddir, bname)] missing = util.log_time(logfunc=LOG.debug, msg="waiting for files", func=wait_for_files, args=(wait_for + fp_files,)) if len(missing): LOG.warn("Did not find files, but going on: %s", missing) metadata = {} if shcfgxml in missing: LOG.warn("SharedConfig.xml missing, using static instance-id") else: try: metadata['instance-id'] = iid_from_shared_config(shcfgxml) except ValueError as e: LOG.warn("failed to get instance id in %s: %s", shcfgxml, e) metadata['public-keys'] = pubkeys_from_crt_files(fp_files) return metadata def get_data(self): # azure removes/ejects the cdrom containing the ovf-env.xml # file on reboot. So, in order to successfully reboot we # need to look in the datadir and consider that valid ddir = self.ds_cfg['data_dir'] candidates = [self.seed_dir] candidates.extend(list_possible_azure_ds_devs()) if ddir: candidates.append(ddir) found = None for cdev in candidates: try: if cdev.startswith("/dev/"): ret = util.mount_cb(cdev, load_azure_ds_dir) else: ret = load_azure_ds_dir(cdev) except NonAzureDataSource: continue except BrokenAzureDataSource as exc: raise exc except util.MountFailedError: LOG.warn("%s was not mountable", cdev) continue (md, self.userdata_raw, cfg, files) = ret self.seed = cdev self.metadata = util.mergemanydict([md, DEFAULT_METADATA]) self.cfg = util.mergemanydict([cfg, BUILTIN_CLOUD_CONFIG]) found = cdev LOG.debug("found datasource in %s", cdev) break if not found: return False if found == ddir: LOG.debug("using files cached in %s", ddir) # azure / hyper-v provides random data here seed = util.load_file("/sys/firmware/acpi/tables/OEM0", quiet=True, decode=False) if seed: self.metadata['random_seed'] = seed # now update ds_cfg to reflect contents pass in config user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {}) self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg]) if found != ddir: cached_ovfenv = util.load_file( os.path.join(ddir, 'ovf-env.xml'), quiet=True, decode=False) if cached_ovfenv != files['ovf-env.xml']: # source was not walinux-agent's datadir, so we have to clean # up so 'wait_for_files' doesn't return early due to stale data cleaned = [] for f in [os.path.join(ddir, f) for f in DATA_DIR_CLEAN_LIST]: if os.path.exists(f): util.del_file(f) cleaned.append(f) if cleaned: LOG.info("removed stale file(s) in '%s': %s", ddir, str(cleaned)) # walinux agent writes files world readable, but expects # the directory to be protected. write_files(ddir, files, dirmode=0o700) if self.ds_cfg['agent_command'] == '__builtin__': metadata_func = get_metadata_from_fabric else: metadata_func = self.get_metadata_from_agent try: fabric_data = metadata_func() except Exception as exc: LOG.info("Error communicating with Azure fabric; assume we aren't" " on Azure.", exc_info=True) return False self.metadata.update(fabric_data) found_ephemeral = find_ephemeral_disk() if found_ephemeral: self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral LOG.debug("using detected ephemeral0 of %s", found_ephemeral) cc_modules_override = support_new_ephemeral(self.sys_cfg) if cc_modules_override: self.cfg['cloud_config_modules'] = cc_modules_override return True def device_name_to_device(self, name): return self.ds_cfg['disk_aliases'].get(name) def get_config_obj(self): return self.cfg def count_files(mp): return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) def find_ephemeral_part(): """ Locate the default ephmeral0.1 device. This will be the first device that has a LABEL of DEF_EPHEMERAL_LABEL and is a NTFS device. If Azure gets more ephemeral devices, this logic will only identify the first such device. """ c_label_devs = util.find_devs_with("LABEL=%s" % DEF_EPHEMERAL_LABEL) c_fstype_devs = util.find_devs_with("TYPE=ntfs") for dev in c_label_devs: if dev in c_fstype_devs: return dev return None def find_ephemeral_disk(): """ Get the ephemeral disk. """ part_dev = find_ephemeral_part() if part_dev and str(part_dev[-1]).isdigit(): return part_dev[:-1] elif part_dev: return part_dev return None def support_new_ephemeral(cfg): """ Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device may be presented as a fresh device, or not. Since the knowledge of when a disk is supposed to be plowed under is specific to Windows Azure, the logic resides here in the datasource. When a new ephemeral device is detected, cloud-init overrides the default frequency for both disk-setup and mounts for the current boot only. """ device = find_ephemeral_part() if not device: LOG.debug("no default fabric formated ephemeral0.1 found") return None LOG.debug("fabric formated ephemeral0.1 device at %s", device) file_count = 0 try: file_count = util.mount_cb(device, count_files) except: return None LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count) if file_count >= 1: LOG.debug("fabric prepared ephemeral0.1 will be preserved") return None else: # if device was already mounted, then we need to unmount it # race conditions could allow for a check-then-unmount # to have a false positive. so just unmount and then check. try: util.subp(['umount', device]) except util.ProcessExecutionError as e: if device in util.mounts(): LOG.warn("Failed to unmount %s, will not reformat.", device) LOG.debug("Failed umount: %s", e) return None LOG.debug("cloud-init will format ephemeral0.1 this boot.") LOG.debug("setting disk_setup and mounts modules 'always' for this boot") cc_modules = cfg.get('cloud_config_modules') if not cc_modules: return None mod_list = [] for mod in cc_modules: if mod in ("disk_setup", "mounts"): mod_list.append([mod, PER_ALWAYS]) LOG.debug("set module '%s' to 'always' for this boot", mod) else: mod_list.append(mod) return mod_list def perform_hostname_bounce(hostname, cfg, prev_hostname): # set the hostname to 'hostname' if it is not already set to that. # then, if policy is not off, bounce the interface using command command = cfg['command'] interface = cfg['interface'] policy = cfg['policy'] msg = ("hostname=%s policy=%s interface=%s" % (hostname, policy, interface)) env = os.environ.copy() env['interface'] = interface env['hostname'] = hostname env['old_hostname'] = prev_hostname if command == "builtin": command = BOUNCE_COMMAND LOG.debug("pubhname: publishing hostname [%s]", msg) shell = not isinstance(command, (list, tuple)) # capture=False, see comments in bug 1202758 and bug 1206164. util.log_time(logfunc=LOG.debug, msg="publishing hostname", get_uptime=True, func=util.subp, kwargs={'args': command, 'shell': shell, 'capture': False, 'env': env}) def crtfile_to_pubkey(fname, data=None): pipeline = ('openssl x509 -noout -pubkey < "$0" |' 'ssh-keygen -i -m PKCS8 -f /dev/stdin') (out, _err) = util.subp(['sh', '-c', pipeline, fname], capture=True, data=data) return out.rstrip() def pubkeys_from_crt_files(flist): pubkeys = [] errors = [] for fname in flist: try: pubkeys.append(crtfile_to_pubkey(fname)) except util.ProcessExecutionError: errors.append(fname) if errors: LOG.warn("failed to convert the crt files to pubkey: %s", errors) return pubkeys def wait_for_files(flist, maxwait=60, naplen=.5): need = set(flist) waited = 0 while waited < maxwait: need -= set([f for f in need if os.path.exists(f)]) if len(need) == 0: return [] time.sleep(naplen) waited += naplen return need def write_files(datadir, files, dirmode=None): def _redact_password(cnt, fname): """Azure provides the UserPassword in plain text. So we redact it""" try: root = ET.fromstring(cnt) for elem in root.iter(): if ('UserPassword' in elem.tag and elem.text != DEF_PASSWD_REDACTION): elem.text = DEF_PASSWD_REDACTION return ET.tostring(root) except Exception as e: LOG.critical("failed to redact userpassword in {}".format(fname)) return cnt if not datadir: return if not files: files = {} util.ensure_dir(datadir, dirmode) for (name, content) in files.items(): fname = os.path.join(datadir, name) if 'ovf-env.xml' in name: content = _redact_password(content, fname) util.write_file(filename=fname, content=content, mode=0o600) def invoke_agent(cmd): # this is a function itself to simplify patching it for test if cmd: LOG.debug("invoking agent: %s", cmd) util.subp(cmd, shell=(not isinstance(cmd, list))) else: LOG.debug("not invoking agent") def find_child(node, filter_func): ret = [] if not node.hasChildNodes(): return ret for child in node.childNodes: if filter_func(child): ret.append(child) return ret def load_azure_ovf_pubkeys(sshnode): # This parses a 'SSH' node formatted like below, and returns # an array of dicts. # [{'fp': '6BE7A7C3C8A8F4B123CCA5D0C2F1BE4CA7B63ED7', # 'path': 'where/to/go'}] # # # ABC/ABC # ... # results = find_child(sshnode, lambda n: n.localName == "PublicKeys") if len(results) == 0: return [] if len(results) > 1: raise BrokenAzureDataSource("Multiple 'PublicKeys'(%s) in SSH node" % len(results)) pubkeys_node = results[0] pubkeys = find_child(pubkeys_node, lambda n: n.localName == "PublicKey") if len(pubkeys) == 0: return [] found = [] text_node = minidom.Document.TEXT_NODE for pk_node in pubkeys: if not pk_node.hasChildNodes(): continue cur = {'fingerprint': "", 'path': ""} for child in pk_node.childNodes: if child.nodeType == text_node or not child.localName: continue name = child.localName.lower() if name not in cur.keys(): continue if (len(child.childNodes) != 1 or child.childNodes[0].nodeType != text_node): continue cur[name] = child.childNodes[0].wholeText.strip() found.append(cur) return found def read_azure_ovf(contents): try: dom = minidom.parseString(contents) except Exception as e: raise BrokenAzureDataSource("invalid xml: %s" % e) results = find_child(dom.documentElement, lambda n: n.localName == "ProvisioningSection") if len(results) == 0: raise NonAzureDataSource("No ProvisioningSection") if len(results) > 1: raise BrokenAzureDataSource("found '%d' ProvisioningSection items" % len(results)) provSection = results[0] lpcs_nodes = find_child(provSection, lambda n: n.localName == "LinuxProvisioningConfigurationSet") if len(results) == 0: raise NonAzureDataSource("No LinuxProvisioningConfigurationSet") if len(results) > 1: raise BrokenAzureDataSource("found '%d' %ss" % ("LinuxProvisioningConfigurationSet", len(results))) lpcs = lpcs_nodes[0] if not lpcs.hasChildNodes(): raise BrokenAzureDataSource("no child nodes of configuration set") md_props = 'seedfrom' md = {'azure_data': {}} cfg = {} ud = "" password = None username = None for child in lpcs.childNodes: if child.nodeType == dom.TEXT_NODE or not child.localName: continue name = child.localName.lower() simple = False value = "" if (len(child.childNodes) == 1 and child.childNodes[0].nodeType == dom.TEXT_NODE): simple = True value = child.childNodes[0].wholeText attrs = dict([(k, v) for k, v in child.attributes.items()]) # we accept either UserData or CustomData. If both are present # then behavior is undefined. if name == "userdata" or name == "customdata": if attrs.get('encoding') in (None, "base64"): ud = base64.b64decode(''.join(value.split())) else: ud = value elif name == "username": username = value elif name == "userpassword": password = value elif name == "hostname": md['local-hostname'] = value elif name == "dscfg": if attrs.get('encoding') in (None, "base64"): dscfg = base64.b64decode(''.join(value.split())) else: dscfg = value cfg['datasource'] = {DS_NAME: util.load_yaml(dscfg, default={})} elif name == "ssh": cfg['_pubkeys'] = load_azure_ovf_pubkeys(child) elif name == "disablesshpasswordauthentication": cfg['ssh_pwauth'] = util.is_false(value) elif simple: if name in md_props: md[name] = value else: md['azure_data'][name] = value defuser = {} if username: defuser['name'] = username if password and DEF_PASSWD_REDACTION != password: defuser['passwd'] = encrypt_pass(password) defuser['lock_passwd'] = False if defuser: cfg['system_info'] = {'default_user': defuser} if 'ssh_pwauth' not in cfg and password: cfg['ssh_pwauth'] = True return (md, ud, cfg) def encrypt_pass(password, salt_id="$6$"): return crypt.crypt(password, salt_id + util.rand_str(strlen=16)) def list_possible_azure_ds_devs(): # return a sorted list of devices that might have a azure datasource devlist = [] for fstype in ("iso9660", "udf"): devlist.extend(util.find_devs_with("TYPE=%s" % fstype)) devlist.sort(reverse=True) return devlist def load_azure_ds_dir(source_dir): ovf_file = os.path.join(source_dir, "ovf-env.xml") if not os.path.isfile(ovf_file): raise NonAzureDataSource("No ovf-env file found") with open(ovf_file, "rb") as fp: contents = fp.read() md, ud, cfg = read_azure_ovf(contents) return (md, ud, cfg, {'ovf-env.xml': contents}) def iid_from_shared_config(path): with open(path, "rb") as fp: content = fp.read() return iid_from_shared_config_content(content) class BrokenAzureDataSource(Exception): pass class NonAzureDataSource(Exception): pass # Used to match classes to dependencies datasources = [ (DataSourceAzureNet, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), ] # Return a list of data sources that match this set of dependencies def get_datasource_list(depends): return sources.list_from_depends(depends, datasources)