diff options
Diffstat (limited to 'cloudinit/sources/DataSourceAzure.py')
-rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 320 |
1 files changed, 164 insertions, 156 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 09bc196d..2af0ad9b 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -17,26 +17,30 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import base64 +import contextlib import crypt import fnmatch import os import os.path import time +import xml.etree.ElementTree as ET + from xml.dom import minidom from cloudinit import log as logging from cloudinit.settings import PER_ALWAYS from cloudinit import sources from cloudinit import util +from cloudinit.sources.helpers.azure import get_metadata_from_fabric LOG = logging.getLogger(__name__) DS_NAME = 'Azure' DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"} AGENT_START = ['service', 'walinuxagent', 'start'] -BOUNCE_COMMAND = ['sh', '-xc', +BOUNCE_COMMAND = [ + 'sh', '-xc', "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"] -DATA_DIR_CLEAN_LIST = ['SharedConfig.xml'] BUILTIN_DS_CONFIG = { 'agent_command': AGENT_START, @@ -53,9 +57,9 @@ BUILTIN_DS_CONFIG = { BUILTIN_CLOUD_CONFIG = { 'disk_setup': { - 'ephemeral0': {'table_type': 'mbr', - 'layout': True, - 'overwrite': False}, + 'ephemeral0': {'table_type': 'gpt', + 'layout': [100], + 'overwrite': True}, }, 'fs_setup': [{'filesystem': 'ext4', 'device': 'ephemeral0.1', @@ -65,6 +69,40 @@ BUILTIN_CLOUD_CONFIG = { DS_CFG_PATH = ['datasource', DS_NAME] DEF_EPHEMERAL_LABEL = 'Temporary Storage' +# The redacted password fails to meet password complexity requirements +# so we can safely use this to mask/redact the password in the ovf-env.xml +DEF_PASSWD_REDACTION = 'REDACTED' + + +def get_hostname(hostname_command='hostname'): + return util.subp(hostname_command, capture=True)[0].strip() + + +def set_hostname(hostname, hostname_command='hostname'): + util.subp([hostname_command, hostname]) + + +@contextlib.contextmanager +def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'): + """ + Set a temporary hostname, restoring the previous hostname on exit. + + Will have the value of the previous hostname when used as a context + manager, or None if the hostname was not changed. + """ + policy = cfg['hostname_bounce']['policy'] + previous_hostname = get_hostname(hostname_command) + if (not util.is_true(cfg.get('set_hostname')) or + util.is_false(policy) or + (previous_hostname == temp_hostname and policy != 'force')): + yield None + return + set_hostname(temp_hostname, hostname_command) + try: + yield previous_hostname + finally: + set_hostname(previous_hostname, hostname_command) + class DataSourceAzureNet(sources.DataSource): def __init__(self, sys_cfg, distro, paths): @@ -80,6 +118,54 @@ class DataSourceAzureNet(sources.DataSource): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.seed) + def get_metadata_from_agent(self): + temp_hostname = self.metadata.get('local-hostname') + hostname_command = self.ds_cfg['hostname_bounce']['hostname_command'] + with temporary_hostname(temp_hostname, self.ds_cfg, + hostname_command=hostname_command) \ + as previous_hostname: + if (previous_hostname is not None and + util.is_true(self.ds_cfg.get('set_hostname'))): + cfg = self.ds_cfg['hostname_bounce'] + try: + perform_hostname_bounce(hostname=temp_hostname, + cfg=cfg, + prev_hostname=previous_hostname) + except Exception as e: + LOG.warn("Failed publishing hostname: %s", e) + util.logexc(LOG, "handling set_hostname failed") + + try: + invoke_agent(self.ds_cfg['agent_command']) + except util.ProcessExecutionError: + # claim the datasource even if the command failed + util.logexc(LOG, "agent command '%s' failed.", + self.ds_cfg['agent_command']) + + ddir = self.ds_cfg['data_dir'] + + fp_files = [] + key_value = None + for pk in self.cfg.get('_pubkeys', []): + if pk.get('value', None): + key_value = pk['value'] + LOG.debug("ssh authentication: using value from fabric") + else: + bname = str(pk['fingerprint'] + ".crt") + fp_files += [os.path.join(ddir, bname)] + LOG.debug("ssh authentication: " + "using fingerprint from fabirc") + + missing = util.log_time(logfunc=LOG.debug, msg="waiting for files", + func=wait_for_files, + args=(fp_files,)) + if len(missing): + LOG.warn("Did not find files, but going on: %s", missing) + + metadata = {} + metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files) + return metadata + def get_data(self): # azure removes/ejects the cdrom containing the ovf-env.xml # file on reboot. So, in order to successfully reboot we @@ -124,77 +210,34 @@ class DataSourceAzureNet(sources.DataSource): LOG.debug("using files cached in %s", ddir) # azure / hyper-v provides random data here - seed = util.load_file("/sys/firmware/acpi/tables/OEM0", quiet=True) + seed = util.load_file("/sys/firmware/acpi/tables/OEM0", + quiet=True, decode=False) if seed: self.metadata['random_seed'] = seed # now update ds_cfg to reflect contents pass in config user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {}) self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg]) - mycfg = self.ds_cfg - ddir = mycfg['data_dir'] - - if found != ddir: - cached_ovfenv = util.load_file( - os.path.join(ddir, 'ovf-env.xml'), quiet=True) - if cached_ovfenv != files['ovf-env.xml']: - # source was not walinux-agent's datadir, so we have to clean - # up so 'wait_for_files' doesn't return early due to stale data - cleaned = [] - for f in [os.path.join(ddir, f) for f in DATA_DIR_CLEAN_LIST]: - if os.path.exists(f): - util.del_file(f) - cleaned.append(f) - if cleaned: - LOG.info("removed stale file(s) in '%s': %s", - ddir, str(cleaned)) # walinux agent writes files world readable, but expects # the directory to be protected. - write_files(ddir, files, dirmode=0700) - - # handle the hostname 'publishing' - try: - handle_set_hostname(mycfg.get('set_hostname'), - self.metadata.get('local-hostname'), - mycfg['hostname_bounce']) - except Exception as e: - LOG.warn("Failed publishing hostname: %s", e) - util.logexc(LOG, "handling set_hostname failed") - - try: - invoke_agent(mycfg['agent_command']) - except util.ProcessExecutionError: - # claim the datasource even if the command failed - util.logexc(LOG, "agent command '%s' failed.", - mycfg['agent_command']) - - shcfgxml = os.path.join(ddir, "SharedConfig.xml") - wait_for = [shcfgxml] - - fp_files = [] - for pk in self.cfg.get('_pubkeys', []): - bname = str(pk['fingerprint'] + ".crt") - fp_files += [os.path.join(ddir, bname)] - - missing = util.log_time(logfunc=LOG.debug, msg="waiting for files", - func=wait_for_files, - args=(wait_for + fp_files,)) - if len(missing): - LOG.warn("Did not find files, but going on: %s", missing) + write_files(ddir, files, dirmode=0o700) - if shcfgxml in missing: - LOG.warn("SharedConfig.xml missing, using static instance-id") + if self.ds_cfg['agent_command'] == '__builtin__': + metadata_func = get_metadata_from_fabric else: - try: - self.metadata['instance-id'] = iid_from_shared_config(shcfgxml) - except ValueError as e: - LOG.warn("failed to get instance id in %s: %s", shcfgxml, e) + metadata_func = self.get_metadata_from_agent + try: + fabric_data = metadata_func() + except Exception as exc: + LOG.info("Error communicating with Azure fabric; assume we aren't" + " on Azure.", exc_info=True) + return False - pubkeys = pubkeys_from_crt_files(fp_files) - self.metadata['public-keys'] = pubkeys + self.metadata['instance-id'] = util.read_dmi_data('system-uuid') + self.metadata.update(fabric_data) - found_ephemeral = find_ephemeral_disk() + found_ephemeral = find_fabric_formatted_ephemeral_disk() if found_ephemeral: self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral LOG.debug("using detected ephemeral0 of %s", found_ephemeral) @@ -216,30 +259,33 @@ def count_files(mp): return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) -def find_ephemeral_part(): +def find_fabric_formatted_ephemeral_part(): """ - Locate the default ephmeral0.1 device. This will be the first device - that has a LABEL of DEF_EPHEMERAL_LABEL and is a NTFS device. If Azure - gets more ephemeral devices, this logic will only identify the first - such device. + Locate the first fabric formatted ephemeral device. """ - c_label_devs = util.find_devs_with("LABEL=%s" % DEF_EPHEMERAL_LABEL) - c_fstype_devs = util.find_devs_with("TYPE=ntfs") - for dev in c_label_devs: - if dev in c_fstype_devs: - return dev + potential_locations = ['/dev/disk/cloud/azure_resource-part1', + '/dev/disk/azure/resource-part1'] + device_location = None + for potential_location in potential_locations: + if os.path.exists(potential_location): + device_location = potential_location + break + if device_location is None: + return None + ntfs_devices = util.find_devs_with("TYPE=ntfs") + real_device = os.path.realpath(device_location) + if real_device in ntfs_devices: + return device_location return None -def find_ephemeral_disk(): +def find_fabric_formatted_ephemeral_disk(): """ Get the ephemeral disk. """ - part_dev = find_ephemeral_part() - if part_dev and str(part_dev[-1]).isdigit(): - return part_dev[:-1] - elif part_dev: - return part_dev + part_dev = find_fabric_formatted_ephemeral_part() + if part_dev: + return part_dev.split('-')[0] return None @@ -253,7 +299,7 @@ def support_new_ephemeral(cfg): new ephemeral device is detected, cloud-init overrides the default frequency for both disk-setup and mounts for the current boot only. """ - device = find_ephemeral_part() + device = find_fabric_formatted_ephemeral_part() if not device: LOG.debug("no default fabric formated ephemeral0.1 found") return None @@ -298,39 +344,15 @@ def support_new_ephemeral(cfg): return mod_list -def handle_set_hostname(enabled, hostname, cfg): - if not util.is_true(enabled): - return - - if not hostname: - LOG.warn("set_hostname was true but no local-hostname") - return - - apply_hostname_bounce(hostname=hostname, policy=cfg['policy'], - interface=cfg['interface'], - command=cfg['command'], - hostname_command=cfg['hostname_command']) - - -def apply_hostname_bounce(hostname, policy, interface, command, - hostname_command="hostname"): +def perform_hostname_bounce(hostname, cfg, prev_hostname): # set the hostname to 'hostname' if it is not already set to that. # then, if policy is not off, bounce the interface using command - prev_hostname = util.subp(hostname_command, capture=True)[0].strip() - - util.subp([hostname_command, hostname]) - - msg = ("phostname=%s hostname=%s policy=%s interface=%s" % - (prev_hostname, hostname, policy, interface)) - - if util.is_false(policy): - LOG.debug("pubhname: policy false, skipping [%s]", msg) - return - - if prev_hostname == hostname and policy != "force": - LOG.debug("pubhname: no change, policy != force. skipping. [%s]", msg) - return + command = cfg['command'] + interface = cfg['interface'] + policy = cfg['policy'] + msg = ("hostname=%s policy=%s interface=%s" % + (hostname, policy, interface)) env = os.environ.copy() env['interface'] = interface env['hostname'] = hostname @@ -343,15 +365,16 @@ def apply_hostname_bounce(hostname, policy, interface, command, shell = not isinstance(command, (list, tuple)) # capture=False, see comments in bug 1202758 and bug 1206164. util.log_time(logfunc=LOG.debug, msg="publishing hostname", - get_uptime=True, func=util.subp, - kwargs={'args': command, 'shell': shell, 'capture': False, - 'env': env}) + get_uptime=True, func=util.subp, + kwargs={'args': command, 'shell': shell, 'capture': False, + 'env': env}) -def crtfile_to_pubkey(fname): +def crtfile_to_pubkey(fname, data=None): pipeline = ('openssl x509 -noout -pubkey < "$0" |' 'ssh-keygen -i -m PKCS8 -f /dev/stdin') - (out, _err) = util.subp(['sh', '-c', pipeline, fname], capture=True) + (out, _err) = util.subp(['sh', '-c', pipeline, fname], + capture=True, data=data) return out.rstrip() @@ -383,14 +406,30 @@ def wait_for_files(flist, maxwait=60, naplen=.5): def write_files(datadir, files, dirmode=None): + + def _redact_password(cnt, fname): + """Azure provides the UserPassword in plain text. So we redact it""" + try: + root = ET.fromstring(cnt) + for elem in root.iter(): + if ('UserPassword' in elem.tag and + elem.text != DEF_PASSWD_REDACTION): + elem.text = DEF_PASSWD_REDACTION + return ET.tostring(root) + except Exception: + LOG.critical("failed to redact userpassword in {}".format(fname)) + return cnt + if not datadir: return if not files: files = {} util.ensure_dir(datadir, dirmode) for (name, content) in files.items(): - util.write_file(filename=os.path.join(datadir, name), - content=content, mode=0600) + fname = os.path.join(datadir, name) + if 'ovf-env.xml' in name: + content = _redact_password(content, fname) + util.write_file(filename=fname, content=content, mode=0o600) def invoke_agent(cmd): @@ -441,7 +480,8 @@ def load_azure_ovf_pubkeys(sshnode): for pk_node in pubkeys: if not pk_node.hasChildNodes(): continue - cur = {'fingerprint': "", 'path': ""} + + cur = {'fingerprint': "", 'path': "", 'value': ""} for child in pk_node.childNodes: if child.nodeType == text_node or not child.localName: continue @@ -461,20 +501,6 @@ def load_azure_ovf_pubkeys(sshnode): return found -def single_node_at_path(node, pathlist): - curnode = node - for tok in pathlist: - results = find_child(curnode, lambda n: n.localName == tok) - if len(results) == 0: - raise ValueError("missing %s token in %s" % (tok, str(pathlist))) - if len(results) > 1: - raise ValueError("found %s nodes of type %s looking for %s" % - (len(results), tok, str(pathlist))) - curnode = results[0] - - return curnode - - def read_azure_ovf(contents): try: dom = minidom.parseString(contents) @@ -482,7 +508,7 @@ def read_azure_ovf(contents): raise BrokenAzureDataSource("invalid xml: %s" % e) results = find_child(dom.documentElement, - lambda n: n.localName == "ProvisioningSection") + lambda n: n.localName == "ProvisioningSection") if len(results) == 0: raise NonAzureDataSource("No ProvisioningSection") @@ -492,7 +518,8 @@ def read_azure_ovf(contents): provSection = results[0] lpcs_nodes = find_child(provSection, - lambda n: n.localName == "LinuxProvisioningConfigurationSet") + lambda n: + n.localName == "LinuxProvisioningConfigurationSet") if len(results) == 0: raise NonAzureDataSource("No LinuxProvisioningConfigurationSet") @@ -559,7 +586,7 @@ def read_azure_ovf(contents): defuser = {} if username: defuser['name'] = username - if password: + if password and DEF_PASSWD_REDACTION != password: defuser['passwd'] = encrypt_pass(password) defuser['lock_passwd'] = False @@ -592,32 +619,13 @@ def load_azure_ds_dir(source_dir): if not os.path.isfile(ovf_file): raise NonAzureDataSource("No ovf-env file found") - with open(ovf_file, "r") as fp: + with open(ovf_file, "rb") as fp: contents = fp.read() md, ud, cfg = read_azure_ovf(contents) return (md, ud, cfg, {'ovf-env.xml': contents}) -def iid_from_shared_config(path): - with open(path, "rb") as fp: - content = fp.read() - return iid_from_shared_config_content(content) - - -def iid_from_shared_config_content(content): - """ - find INSTANCE_ID in: - <?xml version="1.0" encoding="utf-8"?> - <SharedConfig version="1.0.0.0" goalStateIncarnation="1"> - <Deployment name="INSTANCE_ID" guid="{...}" incarnation="0"> - <Service name="..." guid="{00000000-0000-0000-0000-000000000000}" /> - """ - dom = minidom.parseString(content) - depnode = single_node_at_path(dom, ["SharedConfig", "Deployment"]) - return depnode.attributes.get('name').value - - class BrokenAzureDataSource(Exception): pass @@ -628,7 +636,7 @@ class NonAzureDataSource(Exception): # Used to match classes to dependencies datasources = [ - (DataSourceAzureNet, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), + (DataSourceAzureNet, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), ] |