diff options
author | zsdc <taras@vyos.io> | 2020-12-25 17:52:03 +0200 |
---|---|---|
committer | zsdc <taras@vyos.io> | 2020-12-25 17:52:03 +0200 |
commit | 526c2760b85ff625a10f4a1c9ba83759d8de1441 (patch) | |
tree | c7a87d78a7944dd6a9d02544d1605f3a7d77e313 /cloudinit | |
parent | 66dc53b1b3f8786f3bbb25e914c1dc8161af0494 (diff) | |
parent | 47f4229ebcef9f83df8b549bb869a2dbf6dff17c (diff) | |
download | vyos-cloud-init-526c2760b85ff625a10f4a1c9ba83759d8de1441.tar.gz vyos-cloud-init-526c2760b85ff625a10f4a1c9ba83759d8de1441.zip |
T2117: Cloud-init updated to 20.4
Merged with 20.4 tag from the upstream Cloud-init repository
Diffstat (limited to 'cloudinit')
78 files changed, 2875 insertions, 697 deletions
diff --git a/cloudinit/config/cc_disk_setup.py b/cloudinit/config/cc_disk_setup.py index a7bdc703..d1200694 100644 --- a/cloudinit/config/cc_disk_setup.py +++ b/cloudinit/config/cc_disk_setup.py @@ -35,7 +35,7 @@ either a size or a list containing a size and the numerical value for a partition type. The size for partitions is specified in **percentage** of disk space, not in bytes (e.g. a size of 33 would take up 1/3 of the disk space). The ``overwrite`` option controls whether this module tries to be safe about -writing partition talbes or not. If ``overwrite: false`` is set, the device +writing partition tables or not. If ``overwrite: false`` is set, the device will be checked for a partition table and for a file system and if either is found, the operation will be skipped. If ``overwrite: true`` is set, no checks will be performed. diff --git a/cloudinit/config/cc_growpart.py b/cloudinit/config/cc_growpart.py index 237c3d02..9f338ad1 100644 --- a/cloudinit/config/cc_growpart.py +++ b/cloudinit/config/cc_growpart.py @@ -16,12 +16,13 @@ This is useful for cloud instances with a larger amount of disk space available than the pristine image uses, as it allows the instance to automatically make use of the extra space. -The devices run growpart on are specified as a list under the ``devices`` key. -Each entry in the devices list can be either the path to the device's -mountpoint in the filesystem or a path to the block device in ``/dev``. +The devices on which to run growpart are specified as a list under the +``devices`` key. Each entry in the devices list can be either the path to the +device's mountpoint in the filesystem or a path to the block device in +``/dev``. The utility to use for resizing can be selected using the ``mode`` config key. -If ``mode`` key is set to ``auto``, then any available utility (either +If the ``mode`` key is set to ``auto``, then any available utility (either ``growpart`` or BSD ``gpart``) will be used. If neither utility is available, no error will be raised. If ``mode`` is set to ``growpart``, then the ``growpart`` utility will be used. If this utility is not available on the @@ -34,7 +35,7 @@ where one tool is able to function and the other is not. The default configuration for both should work for most cloud instances. To explicitly prevent ``cloud-initramfs-tools`` from running ``growroot``, the file ``/etc/growroot-disabled`` can be created. By default, both ``growroot`` and -``cc_growpart`` will check for the existance of this file and will not run if +``cc_growpart`` will check for the existence of this file and will not run if it is present. However, this file can be ignored for ``cc_growpart`` by setting ``ignore_growroot_disabled`` to ``true``. For more information on ``cloud-initramfs-tools`` see: https://launchpad.net/cloud-initramfs-tools @@ -196,10 +197,6 @@ class ResizeGpart(object): util.logexc(LOG, "Failed: gpart resize -i %s %s", partnum, diskdev) raise ResizeFailedException(e) from e - # Since growing the FS requires a reboot, make sure we reboot - # first when this module has finished. - open('/var/run/reboot-required', 'a').close() - return (before, get_size(partdev)) diff --git a/cloudinit/config/cc_lxd.py b/cloudinit/config/cc_lxd.py index 7129c9c6..486037d9 100644 --- a/cloudinit/config/cc_lxd.py +++ b/cloudinit/config/cc_lxd.py @@ -283,14 +283,18 @@ def maybe_cleanup_default(net_name, did_init, create, attach, fail_assume_enoent = "failed. Assuming it did not exist." succeeded = "succeeded." if create: - msg = "Deletion of lxd network '%s' %s" + msg = "Detach of lxd network '%s' from profile '%s' %s" try: - _lxc(["network", "delete", net_name]) - LOG.debug(msg, net_name, succeeded) + _lxc(["network", "detach-profile", net_name, profile]) + LOG.debug(msg, net_name, profile, succeeded) except subp.ProcessExecutionError as e: if e.exit_code != 1: raise e - LOG.debug(msg, net_name, fail_assume_enoent) + LOG.debug(msg, net_name, profile, fail_assume_enoent) + else: + msg = "Deletion of lxd network '%s' %s" + _lxc(["network", "delete", net_name]) + LOG.debug(msg, net_name, succeeded) if attach: msg = "Removal of device '%s' from profile '%s' %s" diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py index 54f2f878..c22d1698 100644 --- a/cloudinit/config/cc_mounts.py +++ b/cloudinit/config/cc_mounts.py @@ -255,8 +255,9 @@ def create_swapfile(fname: str, size: str) -> None: try: subp.subp(cmd, capture=True) except subp.ProcessExecutionError as e: - LOG.warning(errmsg, fname, size, method, e) + LOG.info(errmsg, fname, size, method, e) util.del_file(fname) + raise swap_dir = os.path.dirname(fname) util.ensure_dir(swap_dir) @@ -269,9 +270,8 @@ def create_swapfile(fname: str, size: str) -> None: else: try: create_swap(fname, size, "fallocate") - except subp.ProcessExecutionError as e: - LOG.warning(errmsg, fname, size, "dd", e) - LOG.warning("Will attempt with dd.") + except subp.ProcessExecutionError: + LOG.info("fallocate swap creation failed, will attempt with dd") create_swap(fname, size, "dd") if os.path.exists(fname): diff --git a/cloudinit/config/cc_ntp.py b/cloudinit/config/cc_ntp.py index 3d7279d6..e183993f 100644 --- a/cloudinit/config/cc_ntp.py +++ b/cloudinit/config/cc_ntp.py @@ -80,6 +80,14 @@ DISTRO_CLIENT_CONFIG = { 'confpath': '/etc/chrony/chrony.conf', }, }, + 'rhel': { + 'ntp': { + 'service_name': 'ntpd', + }, + 'chrony': { + 'service_name': 'chronyd', + }, + }, 'opensuse': { 'chrony': { 'service_name': 'chronyd', diff --git a/cloudinit/config/cc_power_state_change.py b/cloudinit/config/cc_power_state_change.py index 6fcb8a7d..5780a7e9 100644 --- a/cloudinit/config/cc_power_state_change.py +++ b/cloudinit/config/cc_power_state_change.py @@ -22,7 +22,7 @@ The ``delay`` key specifies a duration to be added onto any shutdown command used. Therefore, if a 5 minute delay and a 120 second shutdown are specified, the maximum amount of time between cloud-init starting and the system shutting down is 7 minutes, and the minimum amount of time is 5 minutes. The ``delay`` -key must have an argument in either the form ``+5`` for 5 minutes or ``now`` +key must have an argument in either the form ``'+5'`` for 5 minutes or ``now`` for immediate shutdown. Optionally, a command can be run to determine whether or not @@ -117,7 +117,7 @@ def check_condition(cond, log=None): def handle(_name, cfg, cloud, log, _args): try: - (args, timeout, condition) = load_power_state(cfg, cloud.distro.name) + (args, timeout, condition) = load_power_state(cfg, cloud.distro) if args is None: log.debug("no power_state provided. doing nothing") return @@ -144,19 +144,7 @@ def handle(_name, cfg, cloud, log, _args): condition, execmd, [args, devnull_fp]) -def convert_delay(delay, fmt=None, scale=None): - if not fmt: - fmt = "+%s" - if not scale: - scale = 1 - - if delay != "now": - delay = fmt % int(int(delay) * int(scale)) - - return delay - - -def load_power_state(cfg, distro_name): +def load_power_state(cfg, distro): # returns a tuple of shutdown_command, timeout # shutdown_command is None if no config found pstate = cfg.get('power_state') @@ -167,44 +155,16 @@ def load_power_state(cfg, distro_name): if not isinstance(pstate, dict): raise TypeError("power_state is not a dict.") - opt_map = {'halt': '-H', 'poweroff': '-P', 'reboot': '-r'} - + modes_ok = ['halt', 'poweroff', 'reboot'] mode = pstate.get("mode") - if mode not in opt_map: + if mode not in distro.shutdown_options_map: raise TypeError( "power_state[mode] required, must be one of: %s. found: '%s'." % - (','.join(opt_map.keys()), mode)) - - delay = pstate.get("delay", "now") - message = pstate.get("message") - scale = 1 - fmt = "+%s" - command = ["shutdown", opt_map[mode]] - - if distro_name == 'alpine': - # Convert integer 30 or string '30' to '1800' (seconds) as Alpine's - # halt/poweroff/reboot commands take seconds rather than minutes. - scale = 60 - # No "+" in front of delay value as not supported by Alpine's commands. - fmt = "%s" - if delay == "now": - # Alpine's commands do not understand "now". - delay = "0" - command = [mode, "-d"] - # Alpine's commands don't support a message. - message = None - - try: - delay = convert_delay(delay, fmt=fmt, scale=scale) - except ValueError as e: - raise TypeError( - "power_state[delay] must be 'now' or '+m' (minutes)." - " found '%s'." % delay - ) from e + (','.join(modes_ok), mode)) - args = command + [delay] - if message: - args.append(message) + args = distro.shutdown_command(mode=mode, + delay=pstate.get("delay", "now"), + message=pstate.get("message")) try: timeout = float(pstate.get('timeout', 30.0)) diff --git a/cloudinit/config/cc_refresh_rmc_and_interface.py b/cloudinit/config/cc_refresh_rmc_and_interface.py new file mode 100644 index 00000000..146758ad --- /dev/null +++ b/cloudinit/config/cc_refresh_rmc_and_interface.py @@ -0,0 +1,159 @@ +# (c) Copyright IBM Corp. 2020 All Rights Reserved +# +# Author: Aman Kumar Sinha <amansi26@in.ibm.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + +""" +Refresh IPv6 interface and RMC +------------------------------ +**Summary:** Ensure Network Manager is not managing IPv6 interface + +This module is IBM PowerVM Hypervisor specific + +Reliable Scalable Cluster Technology (RSCT) is a set of software components +that together provide a comprehensive clustering environment(RAS features) +for IBM PowerVM based virtual machines. RSCT includes the Resource +Monitoring and Control (RMC) subsystem. RMC is a generalized framework used +for managing, monitoring, and manipulating resources. RMC runs as a daemon +process on individual machines and needs creation of unique node id and +restarts during VM boot. +More details refer +https://www.ibm.com/support/knowledgecenter/en/SGVKBA_3.2/admin/bl503_ovrv.htm + +This module handles +- Refreshing RMC +- Disabling NetworkManager from handling IPv6 interface, as IPv6 interface + is used for communication between RMC daemon and PowerVM hypervisor. + +**Internal name:** ``cc_refresh_rmc_and_interface`` + +**Module frequency:** per always + +**Supported distros:** RHEL + +""" + +from cloudinit import log as logging +from cloudinit.settings import PER_ALWAYS +from cloudinit import util +from cloudinit import subp +from cloudinit import netinfo + +import errno + +frequency = PER_ALWAYS + +LOG = logging.getLogger(__name__) +# Ensure that /opt/rsct/bin has been added to standard PATH of the +# distro. The symlink to rmcctrl is /usr/sbin/rsct/bin/rmcctrl . +RMCCTRL = 'rmcctrl' + + +def handle(name, _cfg, _cloud, _log, _args): + if not subp.which(RMCCTRL): + LOG.debug("No '%s' in path, disabled", RMCCTRL) + return + + LOG.debug( + 'Making the IPv6 up explicitly. ' + 'Ensuring IPv6 interface is not being handled by NetworkManager ' + 'and it is restarted to re-establish the communication with ' + 'the hypervisor') + + ifaces = find_ipv6_ifaces() + + # Setting NM_CONTROLLED=no for IPv6 interface + # making it down and up + + if len(ifaces) == 0: + LOG.debug("Did not find any interfaces with ipv6 addresses.") + else: + for iface in ifaces: + refresh_ipv6(iface) + disable_ipv6(sysconfig_path(iface)) + restart_network_manager() + + +def find_ipv6_ifaces(): + info = netinfo.netdev_info() + ifaces = [] + for iface, data in info.items(): + if iface == "lo": + LOG.debug('Skipping localhost interface') + if len(data.get("ipv4", [])) != 0: + # skip this interface, as it has ipv4 addrs + continue + ifaces.append(iface) + return ifaces + + +def refresh_ipv6(interface): + # IPv6 interface is explicitly brought up, subsequent to which the + # RMC services are restarted to re-establish the communication with + # the hypervisor. + subp.subp(['ip', 'link', 'set', interface, 'down']) + subp.subp(['ip', 'link', 'set', interface, 'up']) + + +def sysconfig_path(iface): + return '/etc/sysconfig/network-scripts/ifcfg-' + iface + + +def restart_network_manager(): + subp.subp(['systemctl', 'restart', 'NetworkManager']) + + +def disable_ipv6(iface_file): + # Ensuring that the communication b/w the hypervisor and VM is not + # interrupted due to NetworkManager. For this purpose, as part of + # this function, the NM_CONTROLLED is explicitly set to No for IPV6 + # interface and NetworkManager is restarted. + try: + contents = util.load_file(iface_file) + except IOError as e: + if e.errno == errno.ENOENT: + LOG.debug("IPv6 interface file %s does not exist\n", + iface_file) + else: + raise e + + if 'IPV6INIT' not in contents: + LOG.debug("Interface file %s did not have IPV6INIT", iface_file) + return + + LOG.debug("Editing interface file %s ", iface_file) + + # Dropping any NM_CONTROLLED or IPV6 lines from IPv6 interface file. + lines = contents.splitlines() + lines = [line for line in lines if not search(line)] + lines.append("NM_CONTROLLED=no") + + with open(iface_file, "w") as fp: + fp.write("\n".join(lines) + "\n") + + +def search(contents): + # Search for any NM_CONTROLLED or IPV6 lines in IPv6 interface file. + return( + contents.startswith("IPV6ADDR") or + contents.startswith("IPADDR6") or + contents.startswith("IPV6INIT") or + contents.startswith("NM_CONTROLLED")) + + +def refresh_rmc(): + # To make a healthy connection between RMC daemon and hypervisor we + # refresh RMC. With refreshing RMC we are ensuring that making IPv6 + # down and up shouldn't impact communication between RMC daemon and + # hypervisor. + # -z : stop Resource Monitoring & Control subsystem and all resource + # managers, but the command does not return control to the user + # until the subsystem and all resource managers are stopped. + # -s : start Resource Monitoring & Control subsystem. + try: + subp.subp([RMCCTRL, '-z']) + subp.subp([RMCCTRL, '-s']) + except Exception: + util.logexc(LOG, 'Failed to refresh the RMC subsystem.') + raise diff --git a/cloudinit/config/cc_reset_rmc.py b/cloudinit/config/cc_reset_rmc.py new file mode 100644 index 00000000..1cd72774 --- /dev/null +++ b/cloudinit/config/cc_reset_rmc.py @@ -0,0 +1,143 @@ +# (c) Copyright IBM Corp. 2020 All Rights Reserved +# +# Author: Aman Kumar Sinha <amansi26@in.ibm.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + + +""" +Reset RMC +------------ +**Summary:** reset rsct node id + +Reset RMC module is IBM PowerVM Hypervisor specific + +Reliable Scalable Cluster Technology (RSCT) is a set of software components, +that together provide a comprehensive clustering environment (RAS features) +for IBM PowerVM based virtual machines. RSCT includes the Resource monitoring +and control (RMC) subsystem. RMC is a generalized framework used for managing, +monitoring, and manipulating resources. RMC runs as a daemon process on +individual machines and needs creation of unique node id and restarts +during VM boot. +More details refer +https://www.ibm.com/support/knowledgecenter/en/SGVKBA_3.2/admin/bl503_ovrv.htm + +This module handles +- creation of the unique RSCT node id to every instance/virtual machine + and ensure once set, it isn't changed subsequently by cloud-init. + In order to do so, it restarts RSCT service. + +Prerequisite of using this module is to install RSCT packages. + +**Internal name:** ``cc_reset_rmc`` + +**Module frequency:** per instance + +**Supported distros:** rhel, sles and ubuntu + +""" +import os + +from cloudinit import log as logging +from cloudinit.settings import PER_INSTANCE +from cloudinit import util +from cloudinit import subp + +frequency = PER_INSTANCE + +# RMCCTRL is expected to be in system PATH (/opt/rsct/bin) +# The symlink for RMCCTRL and RECFGCT are +# /usr/sbin/rsct/bin/rmcctrl and +# /usr/sbin/rsct/install/bin/recfgct respectively. +RSCT_PATH = '/opt/rsct/install/bin' +RMCCTRL = 'rmcctrl' +RECFGCT = 'recfgct' + +LOG = logging.getLogger(__name__) + +NODE_ID_FILE = '/etc/ct_node_id' + + +def handle(name, _cfg, cloud, _log, _args): + # Ensuring node id has to be generated only once during first boot + if cloud.datasource.platform_type == 'none': + LOG.debug('Skipping creation of new ct_node_id node') + return + + if not os.path.isdir(RSCT_PATH): + LOG.debug("module disabled, RSCT_PATH not present") + return + + orig_path = os.environ.get('PATH') + try: + add_path(orig_path) + reset_rmc() + finally: + if orig_path: + os.environ['PATH'] = orig_path + else: + del os.environ['PATH'] + + +def reconfigure_rsct_subsystems(): + # Reconfigure the RSCT subsystems, which includes removing all RSCT data + # under the /var/ct directory, generating a new node ID, and making it + # appear as if the RSCT components were just installed + try: + out = subp.subp([RECFGCT])[0] + LOG.debug(out.strip()) + return out + except subp.ProcessExecutionError: + util.logexc(LOG, 'Failed to reconfigure the RSCT subsystems.') + raise + + +def get_node_id(): + try: + fp = util.load_file(NODE_ID_FILE) + node_id = fp.split('\n')[0] + return node_id + except Exception: + util.logexc(LOG, 'Failed to get node ID from file %s.' % NODE_ID_FILE) + raise + + +def add_path(orig_path): + # Adding the RSCT_PATH to env standard path + # So thet cloud init automatically find and + # run RECFGCT to create new node_id. + suff = ":" + orig_path if orig_path else "" + os.environ['PATH'] = RSCT_PATH + suff + return os.environ['PATH'] + + +def rmcctrl(): + # Stop the RMC subsystem and all resource managers so that we can make + # some changes to it + try: + return subp.subp([RMCCTRL, '-z']) + except Exception: + util.logexc(LOG, 'Failed to stop the RMC subsystem.') + raise + + +def reset_rmc(): + LOG.debug('Attempting to reset RMC.') + + node_id_before = get_node_id() + LOG.debug('Node ID at beginning of module: %s', node_id_before) + + # Stop the RMC subsystem and all resource managers so that we can make + # some changes to it + rmcctrl() + reconfigure_rsct_subsystems() + + node_id_after = get_node_id() + LOG.debug('Node ID at end of module: %s', node_id_after) + + # Check if new node ID is generated or not + # by comparing old and new node ID + if node_id_after == node_id_before: + msg = 'New node ID did not get generated.' + LOG.error(msg) + raise Exception(msg) diff --git a/cloudinit/config/cc_resizefs.py b/cloudinit/config/cc_resizefs.py index 978d2ee0..9afbb847 100644 --- a/cloudinit/config/cc_resizefs.py +++ b/cloudinit/config/cc_resizefs.py @@ -9,10 +9,7 @@ """Resizefs: cloud-config module which resizes the filesystem""" import errno -import getopt import os -import re -import shlex import stat from textwrap import dedent @@ -88,56 +85,23 @@ def _resize_zfs(mount_point, devpth): return ('zpool', 'online', '-e', mount_point, devpth) -def _get_dumpfs_output(mount_point): - return subp.subp(['dumpfs', '-m', mount_point])[0] - - -def _get_gpart_output(part): - return subp.subp(['gpart', 'show', part])[0] - - def _can_skip_resize_ufs(mount_point, devpth): - # extract the current fs sector size - """ - # dumpfs -m / - # newfs command for / (/dev/label/rootfs) - newfs -L rootf -O 2 -U -a 4 -b 32768 -d 32768 -e 4096 -f 4096 -g 16384 - -h 64 -i 8192 -j -k 6408 -m 8 -o time -s 58719232 /dev/label/rootf - """ - cur_fs_sz = None - frag_sz = None - dumpfs_res = _get_dumpfs_output(mount_point) - for line in dumpfs_res.splitlines(): - if not line.startswith('#'): - newfs_cmd = shlex.split(line) - opt_value = 'O:Ua:s:b:d:e:f:g:h:i:jk:m:o:L:' - optlist, _args = getopt.getopt(newfs_cmd[1:], opt_value) - for o, a in optlist: - if o == "-s": - cur_fs_sz = int(a) - if o == "-f": - frag_sz = int(a) - # check the current partition size - # Example output from `gpart show /dev/da0`: - # => 40 62914480 da0 GPT (30G) - # 40 1024 1 freebsd-boot (512K) - # 1064 58719232 2 freebsd-ufs (28G) - # 58720296 3145728 3 freebsd-swap (1.5G) - # 61866024 1048496 - free - (512M) - expect_sz = None - m = re.search('^(/dev/.+)p([0-9])$', devpth) - gpart_res = _get_gpart_output(m.group(1)) - for line in gpart_res.splitlines(): - if re.search(r"freebsd-ufs", line): - fields = line.split() - expect_sz = int(fields[1]) - # Normalize the gpart sector size, - # because the size is not exactly the same as fs size. - normal_expect_sz = (expect_sz - expect_sz % (frag_sz / 512)) - if normal_expect_sz == cur_fs_sz: - return True - else: - return False + # possible errors cases on the code-path to growfs -N following: + # https://github.com/freebsd/freebsd/blob/HEAD/sbin/growfs/growfs.c + # This is the "good" error: + skip_start = "growfs: requested size" + skip_contain = "is not larger than the current filesystem size" + # growfs exits with 1 for almost all cases up to this one. + # This means we can't just use rcs=[0, 1] as subp parameter: + try: + subp.subp(['growfs', '-N', devpth]) + except subp.ProcessExecutionError as e: + if e.stderr.startswith(skip_start) and skip_contain in e.stderr: + # This FS is already at the desired size + return True + else: + raise e + return False # Do not use a dictionary as these commands should be able to be used diff --git a/cloudinit/config/cc_resolv_conf.py b/cloudinit/config/cc_resolv_conf.py index 519e66eb..7beb11ca 100644 --- a/cloudinit/config/cc_resolv_conf.py +++ b/cloudinit/config/cc_resolv_conf.py @@ -14,7 +14,7 @@ Resolv Conf This module is intended to manage resolv.conf in environments where early configuration of resolv.conf is necessary for further bootstrapping and/or where configuration management such as puppet or chef own dns configuration. -As Debian/Ubuntu will, by default, utilize resovlconf, and similarly RedHat +As Debian/Ubuntu will, by default, utilize resolvconf, and similarly RedHat will use sysconfig, this module is likely to be of little use unless those are configured correctly. diff --git a/cloudinit/config/cc_ssh.py b/cloudinit/config/cc_ssh.py index 9b2a333a..05a16dbc 100755 --- a/cloudinit/config/cc_ssh.py +++ b/cloudinit/config/cc_ssh.py @@ -83,8 +83,9 @@ enabled by default. Host keys can be added using the ``ssh_keys`` configuration key. The argument to this config key should be a dictionary entries for the public and private keys of each desired key type. Entries in the ``ssh_keys`` config dict should -have keys in the format ``<key type>_private`` and ``<key type>_public``, -e.g. ``rsa_private: <key>`` and ``rsa_public: <key>``. See below for supported +have keys in the format ``<key type>_private``, ``<key type>_public``, and, +optionally, ``<key type>_certificate``, e.g. ``rsa_private: <key>``, +``rsa_public: <key>``, and ``rsa_certificate: <key>``. See below for supported key types. Not all key types have to be specified, ones left unspecified will not be used. If this config option is used, then no keys will be generated. @@ -94,7 +95,8 @@ not be used. If this config option is used, then no keys will be generated. secure .. note:: - to specify multiline private host keys, use yaml multiline syntax + to specify multiline private host keys and certificates, use yaml + multiline syntax If no host keys are specified using ``ssh_keys``, then keys will be generated using ``ssh-keygen``. By default one public/private pair of each supported @@ -128,12 +130,17 @@ config flags are: ... -----END RSA PRIVATE KEY----- rsa_public: ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAGEAoPRhIfLvedSDKw7Xd ... + rsa_certificate: | + ssh-rsa-cert-v01@openssh.com AAAAIHNzaC1lZDI1NTE5LWNlcnQt ... dsa_private: | -----BEGIN DSA PRIVATE KEY----- MIIBxwIBAAJhAKD0YSHy73nUgysO13XsJmd4fHiFyQ+00R7VVu2iV9Qco ... -----END DSA PRIVATE KEY----- dsa_public: ssh-dsa AAAAB3NzaC1yc2EAAAABIwAAAGEAoPRhIfLvedSDKw7Xd ... + dsa_certificate: | + ssh-dsa-cert-v01@openssh.com AAAAIHNzaC1lZDI1NTE5LWNlcnQt ... + ssh_genkeytypes: <key type> disable_root: <true/false> disable_root_opts: <disable root options string> @@ -169,6 +176,8 @@ for k in GENERATE_KEY_NAMES: CONFIG_KEY_TO_FILE.update({"%s_private" % k: (KEY_FILE_TPL % k, 0o600)}) CONFIG_KEY_TO_FILE.update( {"%s_public" % k: (KEY_FILE_TPL % k + ".pub", 0o600)}) + CONFIG_KEY_TO_FILE.update( + {"%s_certificate" % k: (KEY_FILE_TPL % k + "-cert.pub", 0o600)}) PRIV_TO_PUB["%s_private" % k] = "%s_public" % k KEY_GEN_TPL = 'o=$(ssh-keygen -yf "%s") && echo "$o" root@localhost > "%s"' @@ -186,12 +195,18 @@ def handle(_name, cfg, cloud, log, _args): util.logexc(log, "Failed deleting key file %s", f) if "ssh_keys" in cfg: - # if there are keys in cloud-config, use them + # if there are keys and/or certificates in cloud-config, use them for (key, val) in cfg["ssh_keys"].items(): - if key in CONFIG_KEY_TO_FILE: - tgt_fn = CONFIG_KEY_TO_FILE[key][0] - tgt_perms = CONFIG_KEY_TO_FILE[key][1] - util.write_file(tgt_fn, val, tgt_perms) + # skip entry if unrecognized + if key not in CONFIG_KEY_TO_FILE: + continue + tgt_fn = CONFIG_KEY_TO_FILE[key][0] + tgt_perms = CONFIG_KEY_TO_FILE[key][1] + util.write_file(tgt_fn, val, tgt_perms) + # set server to present the most recently identified certificate + if '_certificate' in key: + cert_config = {'HostCertificate': tgt_fn} + ssh_util.update_ssh_config(cert_config) for (priv, pub) in PRIV_TO_PUB.items(): if pub in cfg['ssh_keys'] or priv not in cfg['ssh_keys']: diff --git a/cloudinit/config/cc_users_groups.py b/cloudinit/config/cc_users_groups.py index 426498a3..ac4a4410 100644 --- a/cloudinit/config/cc_users_groups.py +++ b/cloudinit/config/cc_users_groups.py @@ -26,13 +26,14 @@ entry of the ``users`` list. Each entry in the ``users`` list, other than a config keys for an entry in ``users`` are as follows: - ``name``: The user's login name - - ``expiredate``: Optional. Date on which the user's login will be + - ``expiredate``: Optional. Date on which the user's account will be disabled. Default: none - ``gecos``: Optional. Comment about the user, usually a comma-separated string of real name and contact information. Default: none - ``groups``: Optional. Additional groups to add the user to. Default: none - ``homedir``: Optional. Home dir for user. Default is ``/home/<username>`` - - ``inactive``: Optional. Mark user inactive. Default: false + - ``inactive``: Optional. Number of days after a password expires until + the account is permanently disabled. Default: none - ``lock_passwd``: Optional. Disable password login. Default: true - ``no_create_home``: Optional. Do not create home directory. Default: false @@ -80,10 +81,9 @@ config keys for an entry in ``users`` are as follows: .. note:: Most of these configuration options will not be honored if the user - already exists. Following options are the exceptions and they are - applicable on already-existing users: - - 'plain_text_passwd', 'hashed_passwd', 'lock_passwd', 'sudo', - 'ssh_authorized_keys', 'ssh_redirect_user'. + already exists. The following options are the exceptions; they are applied + to already-existing users: ``plain_text_passwd``, ``hashed_passwd``, + ``lock_passwd``, ``sudo``, ``ssh_authorized_keys``, ``ssh_redirect_user``. **Internal name:** ``cc_users_groups`` @@ -103,11 +103,11 @@ config keys for an entry in ``users`` are as follows: - name: <some_restricted_user> sudo: false - name: <username> - expiredate: <date> + expiredate: '<date>' gecos: <comment> groups: <additional groups> homedir: <home directory> - inactive: <true/false> + inactive: '<number of days>' lock_passwd: <true/false> no_create_home: <true/false> no_log_init: <true/false> diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 8a966aee..456bab2c 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -1,6 +1,7 @@ # This file is part of cloud-init. See LICENSE file for license information. """schema.py: Set of module functions for processing cloud-config schema.""" +from cloudinit.cmd.devel import read_cfg_paths from cloudinit import importer from cloudinit.util import find_modules, load_file @@ -173,7 +174,8 @@ def annotated_cloudconfig_file(cloudconfig, original_content, schema_errors): def validate_cloudconfig_file(config_path, schema, annotate=False): """Validate cloudconfig file adheres to a specific jsonschema. - @param config_path: Path to the yaml cloud-config file to parse. + @param config_path: Path to the yaml cloud-config file to parse, or None + to default to system userdata from Paths object. @param schema: Dict describing a valid jsonschema to validate against. @param annotate: Boolean set True to print original config file with error annotations on the offending lines. @@ -181,9 +183,24 @@ def validate_cloudconfig_file(config_path, schema, annotate=False): @raises SchemaValidationError containing any of schema_errors encountered. @raises RuntimeError when config_path does not exist. """ - if not os.path.exists(config_path): - raise RuntimeError('Configfile {0} does not exist'.format(config_path)) - content = load_file(config_path, decode=False) + if config_path is None: + # Use system's raw userdata path + if os.getuid() != 0: + raise RuntimeError( + "Unable to read system userdata as non-root user." + " Try using sudo" + ) + paths = read_cfg_paths() + user_data_file = paths.get_ipath_cur("userdata_raw") + content = load_file(user_data_file, decode=False) + else: + if not os.path.exists(config_path): + raise RuntimeError( + 'Configfile {0} does not exist'.format( + config_path + ) + ) + content = load_file(config_path, decode=False) if not content.startswith(CLOUD_CONFIG_HEADER): errors = ( ('format-l1.c1', 'File {0} needs to begin with "{1}"'.format( @@ -425,6 +442,8 @@ def get_parser(parser=None): description='Validate cloud-config files or document schema') parser.add_argument('-c', '--config-file', help='Path of the cloud-config yaml file to validate') + parser.add_argument('--system', action='store_true', default=False, + help='Validate the system cloud-config userdata') parser.add_argument('-d', '--docs', nargs='+', help=('Print schema module docs. Choices: all or' ' space-delimited cc_names.')) @@ -435,11 +454,11 @@ def get_parser(parser=None): def handle_schema_args(name, args): """Handle provided schema args and perform the appropriate actions.""" - exclusive_args = [args.config_file, args.docs] - if not any(exclusive_args) or all(exclusive_args): - error('Expected either --config-file argument or --docs') + exclusive_args = [args.config_file, args.docs, args.system] + if len([arg for arg in exclusive_args if arg]) != 1: + error('Expected one of --config-file, --system or --docs arguments') full_schema = get_schema() - if args.config_file: + if args.config_file or args.system: try: validate_cloudconfig_file( args.config_file, full_schema, args.annotate) @@ -449,7 +468,11 @@ def handle_schema_args(name, args): except RuntimeError as e: error(str(e)) else: - print("Valid cloud-config file {0}".format(args.config_file)) + if args.config_file is None: + cfg_name = "system userdata" + else: + cfg_name = args.config_file + print("Valid cloud-config:", cfg_name) elif args.docs: schema_ids = [subschema['id'] for subschema in full_schema['allOf']] schema_ids += ['all'] diff --git a/cloudinit/config/tests/test_mounts.py b/cloudinit/config/tests/test_mounts.py index 764a33e3..56510fd6 100644 --- a/cloudinit/config/tests/test_mounts.py +++ b/cloudinit/config/tests/test_mounts.py @@ -4,6 +4,7 @@ from unittest import mock import pytest from cloudinit.config.cc_mounts import create_swapfile +from cloudinit.subp import ProcessExecutionError M_PATH = 'cloudinit.config.cc_mounts.' @@ -26,3 +27,35 @@ class TestCreateSwapfile: create_swapfile(fname, '') assert mock.call(['mkswap', fname]) in m_subp.call_args_list + + @mock.patch(M_PATH + "util.get_mount_info") + @mock.patch(M_PATH + "subp.subp") + def test_fallback_from_fallocate_to_dd( + self, m_subp, m_get_mount_info, caplog, tmpdir + ): + swap_file = tmpdir.join("swap-file") + fname = str(swap_file) + + def subp_side_effect(cmd, *args, **kwargs): + # Mock fallocate failing, to initiate fallback + if cmd[0] == "fallocate": + raise ProcessExecutionError() + + m_subp.side_effect = subp_side_effect + # Use ext4 so both fallocate and dd are valid swap creation methods + m_get_mount_info.return_value = (mock.ANY, "ext4") + + create_swapfile(fname, "") + + cmds = [args[0][0] for args, _kwargs in m_subp.call_args_list] + assert "fallocate" in cmds, "fallocate was not called" + assert "dd" in cmds, "fallocate failure did not fallback to dd" + + assert cmds.index("dd") > cmds.index( + "fallocate" + ), "dd ran before fallocate" + + assert mock.call(["mkswap", fname]) in m_subp.call_args_list + + msg = "fallocate swap creation failed, will attempt with dd" + assert msg in caplog.text diff --git a/cloudinit/config/tests/test_ssh.py b/cloudinit/config/tests/test_ssh.py index 0c554414..87ccdb60 100644 --- a/cloudinit/config/tests/test_ssh.py +++ b/cloudinit/config/tests/test_ssh.py @@ -10,6 +10,8 @@ import logging LOG = logging.getLogger(__name__) MODPATH = "cloudinit.config.cc_ssh." +KEY_NAMES_NO_DSA = [name for name in cc_ssh.GENERATE_KEY_NAMES + if name not in 'dsa'] @mock.patch(MODPATH + "ssh_util.setup_user_keys") @@ -25,7 +27,7 @@ class TestHandleSsh(CiTestCase): } self.test_hostkey_files = [] hostkey_tmpdir = self.tmp_dir() - for key_type in ['dsa', 'ecdsa', 'ed25519', 'rsa']: + for key_type in cc_ssh.GENERATE_KEY_NAMES: key_data = self.test_hostkeys[key_type] filename = 'ssh_host_%s_key.pub' % key_type filepath = os.path.join(hostkey_tmpdir, filename) @@ -223,7 +225,7 @@ class TestHandleSsh(CiTestCase): cfg = {} expected_call = [self.test_hostkeys[key_type] for key_type - in ['ecdsa', 'ed25519', 'rsa']] + in KEY_NAMES_NO_DSA] cc_ssh.handle("name", cfg, cloud, LOG, None) self.assertEqual([mock.call(expected_call)], cloud.datasource.publish_host_keys.call_args_list) @@ -252,7 +254,7 @@ class TestHandleSsh(CiTestCase): cfg = {'ssh_publish_hostkeys': {'enabled': True}} expected_call = [self.test_hostkeys[key_type] for key_type - in ['ecdsa', 'ed25519', 'rsa']] + in KEY_NAMES_NO_DSA] cc_ssh.handle("name", cfg, cloud, LOG, None) self.assertEqual([mock.call(expected_call)], cloud.datasource.publish_host_keys.call_args_list) @@ -339,7 +341,65 @@ class TestHandleSsh(CiTestCase): cfg = {'ssh_publish_hostkeys': {'enabled': True, 'blacklist': []}} expected_call = [self.test_hostkeys[key_type] for key_type - in ['dsa', 'ecdsa', 'ed25519', 'rsa']] + in cc_ssh.GENERATE_KEY_NAMES] cc_ssh.handle("name", cfg, cloud, LOG, None) self.assertEqual([mock.call(expected_call)], cloud.datasource.publish_host_keys.call_args_list) + + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "util.write_file") + def test_handle_ssh_keys_in_cfg(self, m_write_file, m_nug, m_setup_keys): + """Test handle with ssh keys and certificate.""" + # Populate a config dictionary to pass to handle() as well + # as the expected file-writing calls. + cfg = {"ssh_keys": {}} + + expected_calls = [] + for key_type in cc_ssh.GENERATE_KEY_NAMES: + private_name = "{}_private".format(key_type) + public_name = "{}_public".format(key_type) + cert_name = "{}_certificate".format(key_type) + + # Actual key contents don"t have to be realistic + private_value = "{}_PRIVATE_KEY".format(key_type) + public_value = "{}_PUBLIC_KEY".format(key_type) + cert_value = "{}_CERT_KEY".format(key_type) + + cfg["ssh_keys"][private_name] = private_value + cfg["ssh_keys"][public_name] = public_value + cfg["ssh_keys"][cert_name] = cert_value + + expected_calls.extend([ + mock.call( + '/etc/ssh/ssh_host_{}_key'.format(key_type), + private_value, + 384 + ), + mock.call( + '/etc/ssh/ssh_host_{}_key.pub'.format(key_type), + public_value, + 384 + ), + mock.call( + '/etc/ssh/ssh_host_{}_key-cert.pub'.format(key_type), + cert_value, + 384 + ), + mock.call( + '/etc/ssh/sshd_config', + ('HostCertificate /etc/ssh/ssh_host_{}_key-cert.pub' + '\n'.format(key_type)), + preserve_mode=True + ) + ]) + + # Run the handler. + m_nug.return_value = ([], {}) + with mock.patch(MODPATH + 'ssh_util.parse_ssh_config', + return_value=[]): + cc_ssh.handle("name", cfg, self.tmp_cloud(distro='ubuntu'), + LOG, None) + + # Check that all expected output has been done. + for call_ in expected_calls: + self.assertIn(call_, m_write_file.call_args_list) diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 2537608f..1e118472 100755 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -23,6 +23,7 @@ from cloudinit import net from cloudinit.net import eni from cloudinit.net import network_state from cloudinit.net import renderers +from cloudinit import persistence from cloudinit import ssh_util from cloudinit import type_utils from cloudinit import subp @@ -62,7 +63,7 @@ PREFERRED_NTP_CLIENTS = ['chrony', 'systemd-timesyncd', 'ntp', 'ntpdate'] LDH_ASCII_CHARS = string.ascii_letters + string.digits + "-" -class Distro(metaclass=abc.ABCMeta): +class Distro(persistence.CloudInitPickleMixin, metaclass=abc.ABCMeta): usr_lib_exec = "/usr/lib" hosts_fn = "/etc/hosts" @@ -73,6 +74,11 @@ class Distro(metaclass=abc.ABCMeta): renderer_configs = {} _preferred_ntp_clients = None networking_cls = LinuxNetworking + # This is used by self.shutdown_command(), and can be overridden in + # subclasses + shutdown_options_map = {'halt': '-H', 'poweroff': '-P', 'reboot': '-r'} + + _ci_pkl_version = 1 def __init__(self, name, cfg, paths): self._paths = paths @@ -80,6 +86,18 @@ class Distro(metaclass=abc.ABCMeta): self.name = name self.networking = self.networking_cls() + def _unpickle(self, ci_pkl_version: int) -> None: + """Perform deserialization fixes for Distro.""" + if "networking" not in self.__dict__ or not self.networking.__dict__: + # This is either a Distro pickle with no networking attribute OR + # this is a Distro pickle with a networking attribute but from + # before ``Networking`` had any state (meaning that + # Networking.__setstate__ will not be called). In either case, we + # want to ensure that `self.networking` is freshly-instantiated: + # either because it isn't present at all, or because it will be + # missing expected instance state otherwise. + self.networking = self.networking_cls() + @abc.abstractmethod def install_packages(self, pkglist): raise NotImplementedError() @@ -250,8 +268,9 @@ class Distro(metaclass=abc.ABCMeta): distros = [] for family in family_list: if family not in OSFAMILIES: - raise ValueError("No distibutions found for osfamily %s" - % (family)) + raise ValueError( + "No distributions found for osfamily {}".format(family) + ) distros.extend(OSFAMILIES[family]) return distros @@ -749,6 +768,22 @@ class Distro(metaclass=abc.ABCMeta): subp.subp(['usermod', '-a', '-G', name, member]) LOG.info("Added user '%s' to group '%s'", member, name) + def shutdown_command(self, *, mode, delay, message): + # called from cc_power_state_change.load_power_state + command = ["shutdown", self.shutdown_options_map[mode]] + try: + if delay != "now": + delay = "+%d" % int(delay) + except ValueError as e: + raise TypeError( + "power_state[delay] must be 'now' or '+m' (minutes)." + " found '%s'." % (delay,) + ) from e + args = command + [delay] + if message: + args.append(message) + return args + def _apply_hostname_transformations_to_url(url: str, transformations: list): """ diff --git a/cloudinit/distros/alpine.py b/cloudinit/distros/alpine.py index e42443fc..ca5bfe80 100644 --- a/cloudinit/distros/alpine.py +++ b/cloudinit/distros/alpine.py @@ -8,7 +8,6 @@ from cloudinit import distros from cloudinit import helpers -from cloudinit import log as logging from cloudinit import subp from cloudinit import util @@ -16,8 +15,6 @@ from cloudinit.distros.parsers.hostname import HostnameConf from cloudinit.settings import PER_INSTANCE -LOG = logging.getLogger(__name__) - NETWORK_FILE_HEADER = """\ # This file is generated from information provided by the datasource. Changes # to it will not persist across an instance reboot. To disable cloud-init's @@ -162,4 +159,30 @@ class Distro(distros.Distro): return self._preferred_ntp_clients + def shutdown_command(self, mode='poweroff', delay='now', message=None): + # called from cc_power_state_change.load_power_state + # Alpine has halt/poweroff/reboot, with the following specifics: + # - we use them rather than the generic "shutdown" + # - delay is given with "-d [integer]" + # - the integer is in seconds, cannot be "now", and takes no "+" + # - no message is supported (argument ignored, here) + + command = [mode, "-d"] + + # Convert delay from minutes to seconds, as Alpine's + # halt/poweroff/reboot commands take seconds rather than minutes. + if delay == "now": + # Alpine's commands do not understand "now". + command += ['0'] + else: + try: + command.append(str(int(delay) * 60)) + except ValueError as e: + raise TypeError( + "power_state[delay] must be 'now' or '+m' (minutes)." + " found '%s'." % (delay,) + ) from e + + return command + # vi: ts=4 expandtab diff --git a/cloudinit/distros/amazon.py b/cloudinit/distros/amazon.py index ff9a549f..5fcec952 100644 --- a/cloudinit/distros/amazon.py +++ b/cloudinit/distros/amazon.py @@ -12,10 +12,6 @@ from cloudinit.distros import rhel -from cloudinit import log as logging - -LOG = logging.getLogger(__name__) - class Distro(rhel.Distro): diff --git a/cloudinit/distros/bsd.py b/cloudinit/distros/bsd.py index 2ed7a7d5..f717a667 100644 --- a/cloudinit/distros/bsd.py +++ b/cloudinit/distros/bsd.py @@ -17,6 +17,10 @@ class BSD(distros.Distro): hostname_conf_fn = '/etc/rc.conf' rc_conf_fn = "/etc/rc.conf" + # This differs from the parent Distro class, which has -P for + # poweroff. + shutdown_options_map = {'halt': '-H', 'poweroff': '-p', 'reboot': '-r'} + # Set in BSD distro subclasses group_add_cmd_prefix = [] pkg_cmd_install_prefix = [] diff --git a/cloudinit/distros/centos.py b/cloudinit/distros/centos.py index 4b803d2e..edb3165d 100644 --- a/cloudinit/distros/centos.py +++ b/cloudinit/distros/centos.py @@ -1,9 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. from cloudinit.distros import rhel -from cloudinit import log as logging - -LOG = logging.getLogger(__name__) class Distro(rhel.Distro): diff --git a/cloudinit/distros/fedora.py b/cloudinit/distros/fedora.py index a9490d0e..0fe1fbca 100644 --- a/cloudinit/distros/fedora.py +++ b/cloudinit/distros/fedora.py @@ -10,10 +10,6 @@ from cloudinit.distros import rhel -from cloudinit import log as logging - -LOG = logging.getLogger(__name__) - class Distro(rhel.Distro): pass diff --git a/cloudinit/distros/gentoo.py b/cloudinit/distros/gentoo.py index 2bee1c89..e9b82602 100644 --- a/cloudinit/distros/gentoo.py +++ b/cloudinit/distros/gentoo.py @@ -160,10 +160,12 @@ class Distro(distros.Distro): pass if not conf: conf = HostnameConf('') - conf.set_hostname(your_hostname) - gentoo_hostname_config = 'hostname="%s"' % conf - gentoo_hostname_config = gentoo_hostname_config.replace('\n', '') - util.write_file(out_fn, gentoo_hostname_config, 0o644) + + # Many distro's format is the hostname by itself, and that is the + # way HostnameConf works but gentoo expects it to be in + # hostname="the-actual-hostname" + conf.set_hostname('hostname="%s"' % your_hostname) + util.write_file(out_fn, str(conf), 0o644) def _read_system_hostname(self): sys_hostname = self._read_hostname(self.hostname_conf_fn) diff --git a/cloudinit/distros/networking.py b/cloudinit/distros/networking.py index 10ed249d..c291196a 100644 --- a/cloudinit/distros/networking.py +++ b/cloudinit/distros/networking.py @@ -2,6 +2,7 @@ import abc import logging import os +from cloudinit import subp from cloudinit import net, util @@ -22,6 +23,9 @@ class Networking(metaclass=abc.ABCMeta): Hierarchy" in HACKING.rst for full details. """ + def __init__(self): + self.blacklist_drivers = None + def _get_current_rename_info(self) -> dict: return net._get_current_rename_info() @@ -68,7 +72,8 @@ class Networking(metaclass=abc.ABCMeta): return net.get_interfaces() def get_interfaces_by_mac(self) -> dict: - return net.get_interfaces_by_mac() + return net.get_interfaces_by_mac( + blacklist_drivers=self.blacklist_drivers) def get_master(self, devname: DeviceName): return net.get_master(devname) @@ -171,6 +176,10 @@ class Networking(metaclass=abc.ABCMeta): if strict: raise RuntimeError(msg) + @abc.abstractmethod + def try_set_link_up(self, devname: DeviceName) -> bool: + """Try setting the link to up explicitly and return if it is up.""" + class BSDNetworking(Networking): """Implementation of networking functionality shared across BSDs.""" @@ -181,6 +190,9 @@ class BSDNetworking(Networking): def settle(self, *, exists=None) -> None: """BSD has no equivalent to `udevadm settle`; noop.""" + def try_set_link_up(self, devname: DeviceName) -> bool: + raise NotImplementedError() + class LinuxNetworking(Networking): """Implementation of networking functionality common to Linux distros.""" @@ -210,3 +222,10 @@ class LinuxNetworking(Networking): if exists is not None: exists = net.sys_dev_path(exists) util.udevadm_settle(exists=exists) + + def try_set_link_up(self, devname: DeviceName) -> bool: + """Try setting the link to up explicitly and return if it is up. + Not guaranteed to bring the interface up. The caller is expected to + add wait times before retrying.""" + subp.subp(['ip', 'link', 'set', devname, 'up']) + return self.is_up(devname) diff --git a/cloudinit/distros/opensuse.py b/cloudinit/distros/opensuse.py index b8e557b8..7ca0ef99 100644 --- a/cloudinit/distros/opensuse.py +++ b/cloudinit/distros/opensuse.py @@ -13,15 +13,12 @@ from cloudinit import distros from cloudinit.distros.parsers.hostname import HostnameConf from cloudinit import helpers -from cloudinit import log as logging from cloudinit import subp from cloudinit import util from cloudinit.distros import rhel_util as rhutil from cloudinit.settings import PER_INSTANCE -LOG = logging.getLogger(__name__) - class Distro(distros.Distro): clock_conf_fn = '/etc/sysconfig/clock' diff --git a/cloudinit/distros/rhel_util.py b/cloudinit/distros/rhel_util.py index 387a851f..d71394b4 100644 --- a/cloudinit/distros/rhel_util.py +++ b/cloudinit/distros/rhel_util.py @@ -8,7 +8,6 @@ # # This file is part of cloud-init. See LICENSE file for license information. -from cloudinit.distros.parsers.resolv_conf import ResolvConf from cloudinit.distros.parsers.sys_conf import SysConf from cloudinit import log as logging @@ -50,29 +49,4 @@ def read_sysconfig_file(fn): contents = [] return (exists, SysConf(contents)) - -# Helper function to update RHEL/SUSE /etc/resolv.conf -def update_resolve_conf_file(fn, dns_servers, search_servers): - try: - r_conf = ResolvConf(util.load_file(fn)) - r_conf.parse() - except IOError: - util.logexc(LOG, "Failed at parsing %s reverting to an empty " - "instance", fn) - r_conf = ResolvConf('') - r_conf.parse() - if dns_servers: - for s in dns_servers: - try: - r_conf.add_nameserver(s) - except ValueError: - util.logexc(LOG, "Failed at adding nameserver %s", s) - if search_servers: - for s in search_servers: - try: - r_conf.add_search_domain(s) - except ValueError: - util.logexc(LOG, "Failed at adding search domain %s", s) - util.write_file(fn, str(r_conf), 0o644) - # vi: ts=4 expandtab diff --git a/cloudinit/distros/sles.py b/cloudinit/distros/sles.py index 6e336cbf..f3bfb9c2 100644 --- a/cloudinit/distros/sles.py +++ b/cloudinit/distros/sles.py @@ -6,10 +6,6 @@ from cloudinit.distros import opensuse -from cloudinit import log as logging - -LOG = logging.getLogger(__name__) - class Distro(opensuse.Distro): pass diff --git a/cloudinit/distros/tests/test_networking.py b/cloudinit/distros/tests/test_networking.py index b9a63842..ec508f4d 100644 --- a/cloudinit/distros/tests/test_networking.py +++ b/cloudinit/distros/tests/test_networking.py @@ -30,6 +30,9 @@ def generic_networking_cls(): def settle(self, *args, **kwargs): raise NotImplementedError + def try_set_link_up(self, *args, **kwargs): + raise NotImplementedError + error = AssertionError("Unexpectedly used /sys in generic networking code") with mock.patch( "cloudinit.net.get_sys_class_path", side_effect=error, @@ -74,6 +77,34 @@ class TestLinuxNetworkingIsPhysical: assert LinuxNetworking().is_physical(devname) +class TestBSDNetworkingTrySetLinkUp: + def test_raises_notimplementederror(self): + with pytest.raises(NotImplementedError): + BSDNetworking().try_set_link_up("eth0") + + +@mock.patch("cloudinit.net.is_up") +@mock.patch("cloudinit.distros.networking.subp.subp") +class TestLinuxNetworkingTrySetLinkUp: + def test_calls_subp_return_true(self, m_subp, m_is_up): + devname = "eth0" + m_is_up.return_value = True + is_success = LinuxNetworking().try_set_link_up(devname) + + assert (mock.call(['ip', 'link', 'set', devname, 'up']) == + m_subp.call_args_list[-1]) + assert is_success + + def test_calls_subp_return_false(self, m_subp, m_is_up): + devname = "eth0" + m_is_up.return_value = False + is_success = LinuxNetworking().try_set_link_up(devname) + + assert (mock.call(['ip', 'link', 'set', devname, 'up']) == + m_subp.call_args_list[-1]) + assert not is_success + + class TestBSDNetworkingSettle: def test_settle_doesnt_error(self): # This also implicitly tests that it doesn't use subp.subp diff --git a/cloudinit/distros/ubuntu.py b/cloudinit/distros/ubuntu.py index b4c4b0c3..2a1f93d9 100644 --- a/cloudinit/distros/ubuntu.py +++ b/cloudinit/distros/ubuntu.py @@ -11,13 +11,10 @@ from cloudinit.distros import debian from cloudinit.distros import PREFERRED_NTP_CLIENTS -from cloudinit import log as logging from cloudinit import util import copy -LOG = logging.getLogger(__name__) - class Distro(debian.Distro): diff --git a/cloudinit/dmi.py b/cloudinit/dmi.py new file mode 100644 index 00000000..f0e69a5a --- /dev/null +++ b/cloudinit/dmi.py @@ -0,0 +1,163 @@ +# This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import log as logging +from cloudinit import subp +from cloudinit.util import is_container, is_FreeBSD + +from collections import namedtuple +import os + +LOG = logging.getLogger(__name__) + +# Path for DMI Data +DMI_SYS_PATH = "/sys/class/dmi/id" + +kdmi = namedtuple('KernelNames', ['linux', 'freebsd']) +kdmi.__new__.defaults__ = (None, None) + +# FreeBSD's kenv(1) and Linux /sys/class/dmi/id/* both use different names from +# dmidecode. The values are the same, and ultimately what we're interested in. +# These tools offer a "cheaper" way to access those values over dmidecode. +# This is our canonical translation table. If we add more tools on other +# platforms to find dmidecode's values, their keys need to be put in here. +DMIDECODE_TO_KERNEL = { + 'baseboard-asset-tag': kdmi('board_asset_tag', 'smbios.planar.tag'), + 'baseboard-manufacturer': kdmi('board_vendor', 'smbios.planar.maker'), + 'baseboard-product-name': kdmi('board_name', 'smbios.planar.product'), + 'baseboard-serial-number': kdmi('board_serial', 'smbios.planar.serial'), + 'baseboard-version': kdmi('board_version', 'smbios.planar.version'), + 'bios-release-date': kdmi('bios_date', 'smbios.bios.reldate'), + 'bios-vendor': kdmi('bios_vendor', 'smbios.bios.vendor'), + 'bios-version': kdmi('bios_version', 'smbios.bios.version'), + 'chassis-asset-tag': kdmi('chassis_asset_tag', 'smbios.chassis.tag'), + 'chassis-manufacturer': kdmi('chassis_vendor', 'smbios.chassis.maker'), + 'chassis-serial-number': kdmi('chassis_serial', 'smbios.chassis.serial'), + 'chassis-version': kdmi('chassis_version', 'smbios.chassis.version'), + 'system-manufacturer': kdmi('sys_vendor', 'smbios.system.maker'), + 'system-product-name': kdmi('product_name', 'smbios.system.product'), + 'system-serial-number': kdmi('product_serial', 'smbios.system.serial'), + 'system-uuid': kdmi('product_uuid', 'smbios.system.uuid'), + 'system-version': kdmi('product_version', 'smbios.system.version'), +} + + +def _read_dmi_syspath(key): + """ + Reads dmi data from /sys/class/dmi/id + """ + kmap = DMIDECODE_TO_KERNEL.get(key) + if kmap is None or kmap.linux is None: + return None + dmi_key_path = "{0}/{1}".format(DMI_SYS_PATH, kmap.linux) + LOG.debug("querying dmi data %s", dmi_key_path) + if not os.path.exists(dmi_key_path): + LOG.debug("did not find %s", dmi_key_path) + return None + + try: + with open(dmi_key_path, "rb") as fp: + key_data = fp.read() + except PermissionError: + LOG.debug("Could not read %s", dmi_key_path) + return None + + # uninitialized dmi values show as all \xff and /sys appends a '\n'. + # in that event, return empty string. + if key_data == b'\xff' * (len(key_data) - 1) + b'\n': + key_data = b"" + + try: + return key_data.decode('utf8').strip() + except UnicodeDecodeError as e: + LOG.error("utf-8 decode of content (%s) in %s failed: %s", + dmi_key_path, key_data, e) + + return None + + +def _read_kenv(key): + """ + Reads dmi data from FreeBSD's kenv(1) + """ + kmap = DMIDECODE_TO_KERNEL.get(key) + if kmap is None or kmap.freebsd is None: + return None + + LOG.debug("querying dmi data %s", kmap.freebsd) + + try: + cmd = ["kenv", "-q", kmap.freebsd] + (result, _err) = subp.subp(cmd) + result = result.strip() + LOG.debug("kenv returned '%s' for '%s'", result, kmap.freebsd) + return result + except subp.ProcessExecutionError as e: + LOG.debug('failed kenv cmd: %s\n%s', cmd, e) + return None + + return None + + +def _call_dmidecode(key, dmidecode_path): + """ + Calls out to dmidecode to get the data out. This is mostly for supporting + OS's without /sys/class/dmi/id support. + """ + try: + cmd = [dmidecode_path, "--string", key] + (result, _err) = subp.subp(cmd) + result = result.strip() + LOG.debug("dmidecode returned '%s' for '%s'", result, key) + if result.replace(".", "") == "": + return "" + return result + except subp.ProcessExecutionError as e: + LOG.debug('failed dmidecode cmd: %s\n%s', cmd, e) + return None + + +def read_dmi_data(key): + """ + Wrapper for reading DMI data. + + If running in a container return None. This is because DMI data is + assumed to be not useful in a container as it does not represent the + container but rather the host. + + This will do the following (returning the first that produces a + result): + 1) Use a mapping to translate `key` from dmidecode naming to + sysfs naming and look in /sys/class/dmi/... for a value. + 2) Use `key` as a sysfs key directly and look in /sys/class/dmi/... + 3) Fall-back to passing `key` to `dmidecode --string`. + + If all of the above fail to find a value, None will be returned. + """ + + if is_container(): + return None + + if is_FreeBSD(): + return _read_kenv(key) + + syspath_value = _read_dmi_syspath(key) + if syspath_value is not None: + return syspath_value + + def is_x86(arch): + return (arch == 'x86_64' or (arch[0] == 'i' and arch[2:] == '86')) + + # running dmidecode can be problematic on some arches (LP: #1243287) + uname_arch = os.uname()[4] + if not (is_x86(uname_arch) or uname_arch in ('aarch64', 'amd64')): + LOG.debug("dmidata is not supported on %s", uname_arch) + return None + + dmidecode_path = subp.which('dmidecode') + if dmidecode_path: + return _call_dmidecode(key, dmidecode_path) + + LOG.warning("did not find either path %s or dmidecode command", + DMI_SYS_PATH) + return None + +# vi: ts=4 expandtab diff --git a/cloudinit/features.py b/cloudinit/features.py index c44fa29e..e1116a17 100644 --- a/cloudinit/features.py +++ b/cloudinit/features.py @@ -21,20 +21,32 @@ all valid states of a flag, not just the default state. ERROR_ON_USER_DATA_FAILURE = True """ If there is a failure in obtaining user data (i.e., #include or -decompress fails), old behavior is to log a warning and proceed. -After the 20.2 release, we instead raise an exception. -This flag can be removed after Focal is no longer supported +decompress fails) and ``ERROR_ON_USER_DATA_FAILURE`` is ``False``, +cloud-init will log a warning and proceed. If it is ``True``, +cloud-init will instead raise an exception. + +As of 20.3, ``ERROR_ON_USER_DATA_FAILURE`` is ``True``. + +(This flag can be removed after Focal is no longer supported.) """ ALLOW_EC2_MIRRORS_ON_NON_AWS_INSTANCE_TYPES = False """ -When configuring apt mirrors, old behavior is to allow -the use of ec2 mirrors if the datasource availability_zone format -matches one of the possible aws ec2 regions. After the 20.2 release, we -no longer publish ec2 region mirror urls on non-AWS cloud platforms. -Besides feature_overrides.py, users can override this by providing -#cloud-config apt directives. +When configuring apt mirrors, if +``ALLOW_EC2_MIRRORS_ON_NON_AWS_INSTANCE_TYPES`` is ``True`` cloud-init +will detect that a datasource's ``availability_zone`` property looks +like an EC2 availability zone and set the ``ec2_region`` variable when +generating mirror URLs; this can lead to incorrect mirrors being +configured in clouds whose AZs follow EC2's naming pattern. + +As of 20.3, ``ALLOW_EC2_MIRRORS_ON_NON_AWS_INSTANCE_TYPES`` is ``False`` +so we no longer include ``ec2_region`` in mirror determination on +non-AWS cloud platforms. + +If the old behavior is desired, users can provide the appropriate +mirrors via :py:mod:`apt: <cloudinit.config.cc_apt_configure>` +directives in cloud-config. """ try: diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index be0ca0ea..3780326c 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -42,7 +42,7 @@ def recv_key(key, keyserver, retries=(1, 1)): @param retries: an iterable of sleep lengths for retries. Use None to indicate no retries.""" LOG.debug("Importing key '%s' from keyserver '%s'", key, keyserver) - cmd = ["gpg", "--keyserver=%s" % keyserver, "--recv-keys", key] + cmd = ["gpg", "--no-tty", "--keyserver=%s" % keyserver, "--recv-keys", key] if retries is None: retries = [] trynum = 0 diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index 214714bc..9917f551 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -11,13 +11,10 @@ import os from cloudinit import handlers -from cloudinit import log as logging from cloudinit import util from cloudinit.settings import (PER_ALWAYS) -LOG = logging.getLogger(__name__) - class ShellScriptPartHandler(handlers.Handler): diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 668e3cd6..7fa493a6 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -7,12 +7,10 @@ import re from cloudinit import importer -from cloudinit import log as logging from cloudinit import type_utils NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$") -LOG = logging.getLogger(__name__) DEF_MERGE_TYPE = "list()+dict()+str()" MERGER_PREFIX = 'm_' MERGER_ATTR = 'Merger' diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py index e233149a..de65e7af 100644 --- a/cloudinit/net/__init__.py +++ b/cloudinit/net/__init__.py @@ -124,6 +124,15 @@ def master_is_bridge_or_bond(devname): return (os.path.exists(bonding_path) or os.path.exists(bridge_path)) +def master_is_openvswitch(devname): + """Return a bool indicating if devname's master is openvswitch""" + master_path = get_master(devname) + if master_path is None: + return False + ovs_path = sys_dev_path(devname, path="upper_ovs-system") + return os.path.exists(ovs_path) + + def is_netfailover(devname, driver=None): """ netfailover driver uses 3 nics, master, primary and standby. this returns True if the device is either the primary or standby @@ -746,18 +755,22 @@ def get_ib_interface_hwaddr(ifname, ethernet_format): return mac -def get_interfaces_by_mac(): +def get_interfaces_by_mac(blacklist_drivers=None) -> dict: if util.is_FreeBSD(): - return get_interfaces_by_mac_on_freebsd() + return get_interfaces_by_mac_on_freebsd( + blacklist_drivers=blacklist_drivers) elif util.is_NetBSD(): - return get_interfaces_by_mac_on_netbsd() + return get_interfaces_by_mac_on_netbsd( + blacklist_drivers=blacklist_drivers) elif util.is_OpenBSD(): - return get_interfaces_by_mac_on_openbsd() + return get_interfaces_by_mac_on_openbsd( + blacklist_drivers=blacklist_drivers) else: - return get_interfaces_by_mac_on_linux() + return get_interfaces_by_mac_on_linux( + blacklist_drivers=blacklist_drivers) -def get_interfaces_by_mac_on_freebsd(): +def get_interfaces_by_mac_on_freebsd(blacklist_drivers=None) -> dict(): (out, _) = subp.subp(['ifconfig', '-a', 'ether']) # flatten each interface block in a single line @@ -784,7 +797,7 @@ def get_interfaces_by_mac_on_freebsd(): return results -def get_interfaces_by_mac_on_netbsd(): +def get_interfaces_by_mac_on_netbsd(blacklist_drivers=None) -> dict(): ret = {} re_field_match = ( r"(?P<ifname>\w+).*address:\s" @@ -800,7 +813,7 @@ def get_interfaces_by_mac_on_netbsd(): return ret -def get_interfaces_by_mac_on_openbsd(): +def get_interfaces_by_mac_on_openbsd(blacklist_drivers=None) -> dict(): ret = {} re_field_match = ( r"(?P<ifname>\w+).*lladdr\s" @@ -815,12 +828,13 @@ def get_interfaces_by_mac_on_openbsd(): return ret -def get_interfaces_by_mac_on_linux(): +def get_interfaces_by_mac_on_linux(blacklist_drivers=None) -> dict: """Build a dictionary of tuples {mac: name}. Bridges and any devices that have a 'stolen' mac are excluded.""" ret = {} - for name, mac, _driver, _devid in get_interfaces(): + for name, mac, _driver, _devid in get_interfaces( + blacklist_drivers=blacklist_drivers): if mac in ret: raise RuntimeError( "duplicate mac found! both '%s' and '%s' have mac '%s'" % @@ -838,11 +852,13 @@ def get_interfaces_by_mac_on_linux(): return ret -def get_interfaces(): +def get_interfaces(blacklist_drivers=None) -> list: """Return list of interface tuples (name, mac, driver, device_id) Bridges and any devices that have a 'stolen' mac are excluded.""" ret = [] + if blacklist_drivers is None: + blacklist_drivers = [] devs = get_devicelist() # 16 somewhat arbitrarily chosen. Normally a mac is 6 '00:' tokens. zero_mac = ':'.join(('00',) * 16) @@ -855,8 +871,10 @@ def get_interfaces(): continue if is_bond(name): continue - if get_master(name) is not None and not master_is_bridge_or_bond(name): - continue + if get_master(name) is not None: + if (not master_is_bridge_or_bond(name) and + not master_is_openvswitch(name)): + continue if is_netfailover(name): continue mac = get_interface_mac(name) @@ -866,7 +884,11 @@ def get_interfaces(): # skip nics that have no mac (00:00....) if name != 'lo' and mac == zero_mac[:len(mac)]: continue - ret.append((name, mac, device_driver(name), device_devid(name))) + # skip nics that have drivers blacklisted + driver = device_driver(name) + if driver in blacklist_drivers: + continue + ret.append((name, mac, driver, device_devid(name))) return ret diff --git a/cloudinit/net/eni.py b/cloudinit/net/eni.py index 13c041f3..0074691b 100644 --- a/cloudinit/net/eni.py +++ b/cloudinit/net/eni.py @@ -401,6 +401,10 @@ class Renderer(renderer.Renderer): sections = [] subnets = iface.get('subnets', {}) accept_ra = iface.pop('accept-ra', None) + ethernet_wol = iface.pop('wakeonlan', None) + if ethernet_wol: + # Specify WOL setting 'g' for using "Magic Packet" + iface['ethernet-wol'] = 'g' if subnets: for index, subnet in enumerate(subnets): ipv4_subnet_mtu = None diff --git a/cloudinit/net/network_state.py b/cloudinit/net/network_state.py index b2f7d31e..e8bf9e39 100644 --- a/cloudinit/net/network_state.py +++ b/cloudinit/net/network_state.py @@ -369,6 +369,9 @@ class NetworkStateInterpreter(metaclass=CommandHandlerMeta): accept_ra = command.get('accept-ra', None) if accept_ra is not None: accept_ra = util.is_true(accept_ra) + wakeonlan = command.get('wakeonlan', None) + if wakeonlan is not None: + wakeonlan = util.is_true(wakeonlan) iface.update({ 'name': command.get('name'), 'type': command.get('type'), @@ -379,7 +382,8 @@ class NetworkStateInterpreter(metaclass=CommandHandlerMeta): 'address': None, 'gateway': None, 'subnets': subnets, - 'accept-ra': accept_ra + 'accept-ra': accept_ra, + 'wakeonlan': wakeonlan, }) self._network_state['interfaces'].update({command.get('name'): iface}) self.dump_network_state() @@ -820,7 +824,8 @@ def _normalize_subnet(subnet): if subnet.get('type') in ('static', 'static6'): normal_subnet.update( - _normalize_net_keys(normal_subnet, address_keys=('address',))) + _normalize_net_keys(normal_subnet, address_keys=( + 'address', 'ip_address',))) normal_subnet['routes'] = [_normalize_route(r) for r in subnet.get('routes', [])] diff --git a/cloudinit/net/sysconfig.py b/cloudinit/net/sysconfig.py index 0a5d481d..a930e612 100644 --- a/cloudinit/net/sysconfig.py +++ b/cloudinit/net/sysconfig.py @@ -99,6 +99,10 @@ class ConfigMap(object): def __len__(self): return len(self._conf) + def skip_key_value(self, key, val): + """Skip the pair key, value if it matches a certain rule.""" + return False + def to_string(self): buf = io.StringIO() buf.write(_make_header()) @@ -106,6 +110,8 @@ class ConfigMap(object): buf.write("\n") for key in sorted(self._conf.keys()): value = self._conf[key] + if self.skip_key_value(key, value): + continue if isinstance(value, bool): value = self._bool_map[value] if not isinstance(value, str): @@ -214,6 +220,7 @@ class NetInterface(ConfigMap): 'bond': 'Bond', 'bridge': 'Bridge', 'infiniband': 'InfiniBand', + 'vlan': 'Vlan', } def __init__(self, iface_name, base_sysconf_dir, templates, @@ -267,6 +274,11 @@ class NetInterface(ConfigMap): c.routes = self.routes.copy() return c + def skip_key_value(self, key, val): + if key == 'TYPE' and val == 'Vlan': + return True + return False + class Renderer(renderer.Renderer): """Renders network information in a /etc/sysconfig format.""" @@ -355,6 +367,11 @@ class Renderer(renderer.Renderer): if new_key: iface_cfg[new_key] = old_value + # only set WakeOnLan for physical interfaces + if ('wakeonlan' in iface and iface['wakeonlan'] and + iface['type'] == 'physical'): + iface_cfg['ETHTOOL_OPTS'] = 'wol g' + @classmethod def _render_subnets(cls, iface_cfg, subnets, has_default_route, flavor): # setting base values @@ -451,6 +468,10 @@ class Renderer(renderer.Renderer): iface_cfg[mtu_key] = subnet['mtu'] else: iface_cfg[mtu_key] = subnet['mtu'] + + if subnet_is_ipv6(subnet) and flavor == 'rhel': + iface_cfg['IPV6_FORCE_ACCEPT_RA'] = False + iface_cfg['IPV6_AUTOCONF'] = False elif subnet_type == 'manual': if flavor == 'suse': LOG.debug('Unknown subnet type setting "%s"', subnet_type) @@ -697,7 +718,16 @@ class Renderer(renderer.Renderer): iface_cfg['ETHERDEVICE'] = iface_name[:iface_name.rfind('.')] else: iface_cfg['VLAN'] = True - iface_cfg['PHYSDEV'] = iface_name[:iface_name.rfind('.')] + iface_cfg.kind = 'vlan' + + rdev = iface['vlan-raw-device'] + supported = _supported_vlan_names(rdev, iface['vlan_id']) + if iface_name not in supported: + LOG.info( + "Name '%s' for vlan '%s' is not officially supported" + "by RHEL. Supported: %s", + iface_name, rdev, ' '.join(supported)) + iface_cfg['PHYSDEV'] = rdev iface_subnets = iface.get("subnets", []) route_cfg = iface_cfg.routes @@ -896,6 +926,15 @@ class Renderer(renderer.Renderer): "\n".join(netcfg) + "\n", file_mode) +def _supported_vlan_names(rdev, vid): + """Return list of supported names for vlan devices per RHEL doc + 11.5. Naming Scheme for VLAN Interfaces.""" + return [ + v.format(rdev=rdev, vid=int(vid)) + for v in ("{rdev}{vid:04}", "{rdev}{vid}", + "{rdev}.{vid:04}", "{rdev}.{vid}")] + + def available(target=None): sysconfig = available_sysconfig(target=target) nm = available_nm(target=target) diff --git a/cloudinit/net/tests/test_init.py b/cloudinit/net/tests/test_init.py index 311ab6f8..0535387a 100644 --- a/cloudinit/net/tests/test_init.py +++ b/cloudinit/net/tests/test_init.py @@ -190,6 +190,28 @@ class TestReadSysNet(CiTestCase): self.assertTrue(net.master_is_bridge_or_bond('eth1')) self.assertTrue(net.master_is_bridge_or_bond('eth2')) + def test_master_is_openvswitch(self): + ovs_mac = 'bb:cc:aa:bb:cc:aa' + + # No master => False + write_file(os.path.join(self.sysdir, 'eth1', 'address'), ovs_mac) + + self.assertFalse(net.master_is_bridge_or_bond('eth1')) + + # masters without ovs-system => False + write_file(os.path.join(self.sysdir, 'ovs-system', 'address'), ovs_mac) + + os.symlink('../ovs-system', os.path.join(self.sysdir, 'eth1', + 'master')) + + self.assertFalse(net.master_is_openvswitch('eth1')) + + # masters with ovs-system => True + os.symlink('../ovs-system', os.path.join(self.sysdir, 'eth1', + 'upper_ovs-system')) + + self.assertTrue(net.master_is_openvswitch('eth1')) + def test_is_vlan(self): """is_vlan is True when /sys/net/devname/uevent has DEVTYPE=vlan.""" ensure_file(os.path.join(self.sysdir, 'eth0', 'uevent')) @@ -465,20 +487,32 @@ class TestGetInterfaceMAC(CiTestCase): ): bridge_mac = 'aa:bb:cc:aa:bb:cc' bond_mac = 'cc:bb:aa:cc:bb:aa' + ovs_mac = 'bb:cc:aa:bb:cc:aa' + write_file(os.path.join(self.sysdir, 'br0', 'address'), bridge_mac) write_file(os.path.join(self.sysdir, 'br0', 'bridge'), '') write_file(os.path.join(self.sysdir, 'bond0', 'address'), bond_mac) write_file(os.path.join(self.sysdir, 'bond0', 'bonding'), '') + write_file(os.path.join(self.sysdir, 'ovs-system', 'address'), + ovs_mac) + write_file(os.path.join(self.sysdir, 'eth1', 'address'), bridge_mac) os.symlink('../br0', os.path.join(self.sysdir, 'eth1', 'master')) write_file(os.path.join(self.sysdir, 'eth2', 'address'), bond_mac) os.symlink('../bond0', os.path.join(self.sysdir, 'eth2', 'master')) + write_file(os.path.join(self.sysdir, 'eth3', 'address'), ovs_mac) + os.symlink('../ovs-system', os.path.join(self.sysdir, 'eth3', + 'master')) + os.symlink('../ovs-system', os.path.join(self.sysdir, 'eth3', + 'upper_ovs-system')) + interface_names = [interface[0] for interface in net.get_interfaces()] - self.assertEqual(['eth1', 'eth2'], sorted(interface_names)) + self.assertEqual(['eth1', 'eth2', 'eth3', 'ovs-system'], + sorted(interface_names)) class TestInterfaceHasOwnMAC(CiTestCase): diff --git a/cloudinit/persistence.py b/cloudinit/persistence.py new file mode 100644 index 00000000..85aa79df --- /dev/null +++ b/cloudinit/persistence.py @@ -0,0 +1,67 @@ +# Copyright (C) 2020 Canonical Ltd. +# +# Author: Daniel Watkins <oddbloke@ubuntu.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + + +class CloudInitPickleMixin: + """Scaffolding for versioning of pickles. + + This class implements ``__getstate__`` and ``__setstate__`` to provide + lightweight versioning of the pickles that are generated for classes which + use it. Versioning is done at the class level. + + The current version of a class's pickle should be set in the class variable + ``_ci_pkl_version``, as an int. If not overriden, it will default to 0. + + On unpickle, the object's state will be restored and then + ``self._unpickle`` is called with the version of the stored pickle as the + only argument: this is where classes should implement any deserialization + fixes they require. (If the stored pickle has no version, 0 is passed.) + """ + + _ci_pkl_version = 0 + + def __getstate__(self): + """Persist instance state, adding a pickle version attribute. + + This adds a ``_ci_pkl_version`` attribute to ``self.__dict__`` and + returns that for serialisation. The attribute is stripped out in + ``__setstate__`` on unpickle. + + The value of ``_ci_pkl_version`` is ``type(self)._ci_pkl_version``. + """ + state = self.__dict__.copy() + state["_ci_pkl_version"] = type(self)._ci_pkl_version + return state + + def __setstate__(self, state: dict) -> None: + """Restore instance state and handle missing attributes on upgrade. + + This will be called when an instance of this class is unpickled; the + previous instance's ``__dict__`` is passed as ``state``. This method + removes the pickle version from the stored state, restores the + remaining state into the current instance, and then calls + ``self._unpickle`` with the version (or 0, if no version is found in + the stored state). + + See https://docs.python.org/3/library/pickle.html#object.__setstate__ + for further background. + """ + version = state.pop("_ci_pkl_version", 0) + self.__dict__.update(state) + self._unpickle(version) + + def _unpickle(self, ci_pkl_version: int) -> None: + """Perform any deserialization fixes required. + + By default, this does nothing. Classes using this mixin should + override this method if they have fixes they need to apply. + + ``ci_pkl_version`` will be the version stored in the pickle for this + object, or 0 if no version is present. + """ + + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceAliYun.py b/cloudinit/sources/DataSourceAliYun.py index 45cc9f00..09052873 100644 --- a/cloudinit/sources/DataSourceAliYun.py +++ b/cloudinit/sources/DataSourceAliYun.py @@ -1,8 +1,8 @@ # This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import dmi from cloudinit import sources from cloudinit.sources import DataSourceEc2 as EC2 -from cloudinit import util ALIYUN_PRODUCT = "Alibaba Cloud ECS" @@ -30,7 +30,7 @@ class DataSourceAliYun(EC2.DataSourceEc2): def _is_aliyun(): - return util.read_dmi_data('system-product-name') == ALIYUN_PRODUCT + return dmi.read_dmi_data('system-product-name') == ALIYUN_PRODUCT def parse_public_keys(public_keys): diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py index ac3ecc3d..cd93412a 100644 --- a/cloudinit/sources/DataSourceAltCloud.py +++ b/cloudinit/sources/DataSourceAltCloud.py @@ -16,6 +16,7 @@ import errno import os import os.path +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import subp @@ -109,7 +110,7 @@ class DataSourceAltCloud(sources.DataSource): CLOUD_INFO_FILE) return 'UNKNOWN' return cloud_type - system_name = util.read_dmi_data("system-product-name") + system_name = dmi.read_dmi_data("system-product-name") if not system_name: return 'UNKNOWN' diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index f3c6452b..04ff2131 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -12,9 +12,12 @@ import os import os.path import re from time import time +from time import sleep from xml.dom import minidom import xml.etree.ElementTree as ET +from enum import Enum +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net from cloudinit.event import EventType @@ -28,6 +31,7 @@ from cloudinit import util from cloudinit.reporting import events from cloudinit.sources.helpers.azure import ( + DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE, azure_ds_reporter, azure_ds_telemetry_reporter, get_metadata_from_fabric, @@ -37,7 +41,8 @@ from cloudinit.sources.helpers.azure import ( EphemeralDHCPv4WithReporting, is_byte_swapped, dhcp_log_cb, - push_log_to_kvp) + push_log_to_kvp, + report_failure_to_fabric) LOG = logging.getLogger(__name__) @@ -64,13 +69,27 @@ DEFAULT_FS = 'ext4' # DMI chassis-asset-tag is set static for all azure instances AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77' REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds" +REPROVISION_NIC_ATTACH_MARKER_FILE = "/var/lib/cloud/data/wait_for_nic_attach" +REPROVISION_NIC_DETACHED_MARKER_FILE = "/var/lib/cloud/data/nic_detached" REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready" AGENT_SEED_DIR = '/var/lib/waagent' + # In the event where the IMDS primary server is not # available, it takes 1s to fallback to the secondary one IMDS_TIMEOUT_IN_SECONDS = 2 IMDS_URL = "http://169.254.169.254/metadata/" +IMDS_VER = "2019-06-01" +IMDS_VER_PARAM = "api-version={}".format(IMDS_VER) + + +class metadata_type(Enum): + compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM) + network = "{}instance/network?{}".format(IMDS_URL, + IMDS_VER_PARAM) + reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL, + IMDS_VER_PARAM) + PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0" @@ -83,6 +102,25 @@ UBUNTU_EXTENDED_NETWORK_SCRIPTS = [ '/run/network/interfaces.ephemeral.d', ] +# This list is used to blacklist devices that will be considered +# for renaming or fallback interfaces. +# +# On Azure network devices using these drivers are automatically +# configured by the platform and should not be configured by +# cloud-init's network configuration. +# +# Note: +# Azure Dv4 and Ev4 series VMs always have mlx5 hardware. +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/ev4-esv4-series +# Earlier D and E series VMs (such as Dv2, Dv3, and Ev3 series VMs) +# can have either mlx4 or mlx5 hardware, with the older series VMs +# having a higher chance of coming with mlx4 hardware. +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series +# https://docs.microsoft.com/en-us/azure/virtual-machines/ev3-esv3-series +BLACKLIST_DRIVERS = ['mlx4_core', 'mlx5_core'] + def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid): # extract the 'X' from dev.storvsc.X. if deviceid matches @@ -280,9 +318,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'): try: set_hostname(temp_hostname, hostname_command) except Exception as e: - msg = 'Failed setting temporary hostname: %s' % e - report_diagnostic_event(msg) - LOG.warning(msg) + report_diagnostic_event( + 'Failed setting temporary hostname: %s' % e, + logger_func=LOG.warning) yield None return try: @@ -337,7 +375,9 @@ class DataSourceAzure(sources.DataSource): cfg=cfg, prev_hostname=previous_hn) except Exception as e: - LOG.warning("Failed publishing hostname: %s", e) + report_diagnostic_event( + "Failed publishing hostname: %s" % e, + logger_func=LOG.warning) util.logexc(LOG, "handling set_hostname failed") return False @@ -410,20 +450,39 @@ class DataSourceAzure(sources.DataSource): # need to look in the datadir and consider that valid ddir = self.ds_cfg['data_dir'] + # The order in which the candidates are inserted matters here, because + # it determines the value of ret. More specifically, the first one in + # the candidate list determines the path to take in order to get the + # metadata we need. candidates = [self.seed_dir] if os.path.isfile(REPROVISION_MARKER_FILE): candidates.insert(0, "IMDS") + report_diagnostic_event("Reprovision marker file already present " + "before crawling Azure metadata: %s" % + REPROVISION_MARKER_FILE, + logger_func=LOG.debug) + elif os.path.isfile(REPROVISION_NIC_ATTACH_MARKER_FILE): + candidates.insert(0, "NIC_ATTACH_MARKER_PRESENT") + report_diagnostic_event("Reprovision nic attach marker file " + "already present before crawling Azure " + "metadata: %s" % + REPROVISION_NIC_ATTACH_MARKER_FILE, + logger_func=LOG.debug) candidates.extend(list_possible_azure_ds_devs()) if ddir: candidates.append(ddir) found = None reprovision = False + reprovision_after_nic_attach = False for cdev in candidates: try: if cdev == "IMDS": ret = None reprovision = True + elif cdev == "NIC_ATTACH_MARKER_PRESENT": + ret = None + reprovision_after_nic_attach = True elif cdev.startswith("/dev/"): if util.is_FreeBSD(): ret = util.mount_cb(cdev, load_azure_ds_dir, @@ -435,26 +494,32 @@ class DataSourceAzure(sources.DataSource): except NonAzureDataSource: report_diagnostic_event( - "Did not find Azure data source in %s" % cdev) + "Did not find Azure data source in %s" % cdev, + logger_func=LOG.debug) continue except BrokenAzureDataSource as exc: msg = 'BrokenAzureDataSource: %s' % exc - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) except util.MountFailedError: - msg = '%s was not mountable' % cdev - report_diagnostic_event(msg) - LOG.warning(msg) + report_diagnostic_event( + '%s was not mountable' % cdev, logger_func=LOG.warning) continue perform_reprovision = reprovision or self._should_reprovision(ret) - if perform_reprovision: + perform_reprovision_after_nic_attach = ( + reprovision_after_nic_attach or + self._should_reprovision_after_nic_attach(ret)) + + if perform_reprovision or perform_reprovision_after_nic_attach: if util.is_FreeBSD(): msg = "Free BSD is not supported for PPS VMs" - LOG.error(msg) - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) + if perform_reprovision_after_nic_attach: + self._wait_for_all_nics_ready() ret = self._reprovision() + imds_md = get_metadata_from_imds( self.fallback_interface, retries=10) (md, userdata_raw, cfg, files) = ret @@ -467,26 +532,29 @@ class DataSourceAzure(sources.DataSource): 'userdata_raw': userdata_raw}) found = cdev - LOG.debug("found datasource in %s", cdev) + report_diagnostic_event( + 'found datasource in %s' % cdev, logger_func=LOG.debug) break if not found: msg = 'No Azure metadata found' - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.error) raise sources.InvalidMetaDataException(msg) if found == ddir: - LOG.debug("using files cached in %s", ddir) + report_diagnostic_event( + "using files cached in %s" % ddir, logger_func=LOG.debug) seed = _get_random_seed() if seed: crawled_data['metadata']['random_seed'] = seed crawled_data['metadata']['instance-id'] = self._iid() - if perform_reprovision: + if perform_reprovision or perform_reprovision_after_nic_attach: LOG.info("Reporting ready to Azure after getting ReprovisionData") - use_cached_ephemeral = (net.is_up(self.fallback_interface) and - getattr(self, '_ephemeral_dhcp_ctx', None)) + use_cached_ephemeral = ( + self.distro.networking.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None)) if use_cached_ephemeral: self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral @@ -497,7 +565,8 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=lease) except Exception as e: report_diagnostic_event( - "exception while reporting ready: %s" % e) + "exception while reporting ready: %s" % e, + logger_func=LOG.error) raise return crawled_data @@ -529,14 +598,21 @@ class DataSourceAzure(sources.DataSource): except Exception as e: LOG.warning("Failed to get system information: %s", e) + self.distro.networking.blacklist_drivers = BLACKLIST_DRIVERS + try: crawled_data = util.log_time( logfunc=LOG.debug, msg='Crawl of metadata service', func=self.crawl_metadata ) - except sources.InvalidMetaDataException as e: - LOG.warning('Could not crawl Azure metadata: %s', e) + except Exception as e: + report_diagnostic_event( + 'Could not crawl Azure metadata: %s' % e, + logger_func=LOG.error) + self._report_failure( + description=DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE) return False + if (self.distro and self.distro.name == 'ubuntu' and self.ds_cfg.get('apply_network_config')): maybe_remove_ubuntu_network_config_scripts() @@ -561,6 +637,38 @@ class DataSourceAzure(sources.DataSource): def device_name_to_device(self, name): return self.ds_cfg['disk_aliases'].get(name) + @azure_ds_telemetry_reporter + def get_public_ssh_keys(self): + """ + Try to get the ssh keys from IMDS first, and if that fails + (i.e. IMDS is unavailable) then fallback to getting the ssh + keys from OVF. + + The benefit to getting keys from IMDS is a large performance + advantage, so this is a strong preference. But we must keep + OVF as a second option for environments that don't have IMDS. + """ + LOG.debug('Retrieving public SSH keys') + ssh_keys = [] + try: + ssh_keys = [ + public_key['keyData'] + for public_key + in self.metadata['imds']['compute']['publicKeys'] + ] + LOG.debug('Retrieved SSH keys from IMDS') + except KeyError: + log_msg = 'Unable to get keys from IMDS, falling back to OVF' + report_diagnostic_event(log_msg, logger_func=LOG.debug) + try: + ssh_keys = self.metadata['public-keys'] + LOG.debug('Retrieved keys from OVF') + except KeyError: + log_msg = 'No keys available from OVF' + report_diagnostic_event(log_msg, logger_func=LOG.debug) + + return ssh_keys + def get_config_obj(self): return self.cfg @@ -571,7 +679,7 @@ class DataSourceAzure(sources.DataSource): def _iid(self, previous=None): prev_iid_path = os.path.join( self.paths.get_cpath('data'), 'instance-id') - iid = util.read_dmi_data('system-uuid') + iid = dmi.read_dmi_data('system-uuid') if os.path.exists(prev_iid_path): previous = util.load_file(prev_iid_path).strip() if is_byte_swapped(previous, iid): @@ -592,10 +700,293 @@ class DataSourceAzure(sources.DataSource): LOG.debug("negotiating already done for %s", self.get_instance_id()) + @azure_ds_telemetry_reporter + def _wait_for_nic_detach(self, nl_sock): + """Use the netlink socket provided to wait for nic detach event. + NOTE: The function doesn't close the socket. The caller owns closing + the socket and disposing it safely. + """ + try: + ifname = None + + # Preprovisioned VM will only have one NIC, and it gets + # detached immediately after deployment. + with events.ReportEventStack( + name="wait-for-nic-detach", + description=("wait for nic detach"), + parent=azure_ds_reporter): + ifname = netlink.wait_for_nic_detach_event(nl_sock) + if ifname is None: + msg = ("Preprovisioned nic not detached as expected. " + "Proceeding without failing.") + report_diagnostic_event(msg, logger_func=LOG.warning) + else: + report_diagnostic_event("The preprovisioned nic %s is detached" + % ifname, logger_func=LOG.warning) + path = REPROVISION_NIC_DETACHED_MARKER_FILE + LOG.info("Creating a marker file for nic detached: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + except AssertionError as error: + report_diagnostic_event(error, logger_func=LOG.error) + raise + + @azure_ds_telemetry_reporter + def wait_for_link_up(self, ifname): + """In cases where the link state is still showing down after a nic is + hot-attached, we can attempt to bring it up by forcing the hv_netvsc + drivers to query the link state by unbinding and then binding the + device. This function attempts infinitely until the link is up, + because we cannot proceed further until we have a stable link.""" + + if self.distro.networking.try_set_link_up(ifname): + report_diagnostic_event("The link %s is already up." % ifname, + logger_func=LOG.info) + return + + LOG.info("Attempting to bring %s up", ifname) + + attempts = 0 + while True: + + LOG.info("Unbinding and binding the interface %s", ifname) + devicename = net.read_sys_net(ifname, + 'device/device_id').strip('{}') + util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind', + devicename) + util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/bind', + devicename) + + attempts = attempts + 1 + if self.distro.networking.try_set_link_up(ifname): + msg = "The link %s is up after %s attempts" % (ifname, + attempts) + report_diagnostic_event(msg, logger_func=LOG.info) + return + + sleep_duration = 1 + msg = ("Link is not up after %d attempts with %d seconds sleep " + "between attempts." % (attempts, sleep_duration)) + + if attempts % 10 == 0: + report_diagnostic_event(msg, logger_func=LOG.info) + else: + LOG.info(msg) + + sleep(sleep_duration) + + @azure_ds_telemetry_reporter + def _create_report_ready_marker(self): + path = REPORTED_READY_MARKER_FILE + LOG.info( + "Creating a marker file to report ready: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + report_diagnostic_event( + 'Successfully created reported ready marker file ' + 'while in the preprovisioning pool.', + logger_func=LOG.debug) + + @azure_ds_telemetry_reporter + def _report_ready_if_needed(self): + """Report ready to the platform if the marker file is not present, + and create the marker file. + """ + have_not_reported_ready = ( + not os.path.isfile(REPORTED_READY_MARKER_FILE)) + + if have_not_reported_ready: + report_diagnostic_event("Reporting ready before nic detach", + logger_func=LOG.info) + try: + with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: + self._report_ready(lease=lease) + except Exception as e: + report_diagnostic_event("Exception reporting ready during " + "preprovisioning before nic detach: %s" + % e, logger_func=LOG.error) + raise + self._create_report_ready_marker() + else: + report_diagnostic_event("Already reported ready before nic detach." + " The marker file already exists: %s" % + REPORTED_READY_MARKER_FILE, + logger_func=LOG.error) + + @azure_ds_telemetry_reporter + def _check_if_nic_is_primary(self, ifname): + """Check if a given interface is the primary nic or not. If it is the + primary nic, then we also get the expected total nic count from IMDS. + IMDS will process the request and send a response only for primary NIC. + """ + is_primary = False + expected_nic_count = -1 + imds_md = None + + # For now, only a VM's primary NIC can contact IMDS and WireServer. If + # DHCP fails for a NIC, we have no mechanism to determine if the NIC is + # primary or secondary. In this case, the desired behavior is to fail + # VM provisioning if there is any DHCP failure when trying to determine + # the primary NIC. + try: + with events.ReportEventStack( + name="obtain-dhcp-lease", + description=("obtain dhcp lease for %s when attempting to " + "determine primary NIC during reprovision of " + "a pre-provisioned VM" % ifname), + parent=azure_ds_reporter): + dhcp_ctx = EphemeralDHCPv4( + iface=ifname, + dhcp_log_func=dhcp_log_cb) + dhcp_ctx.obtain_lease() + except Exception as e: + report_diagnostic_event("Giving up. Failed to obtain dhcp lease " + "for %s when attempting to determine " + "primary NIC during reprovision due to %s" + % (ifname, e), logger_func=LOG.error) + raise + + # Primary nic detection will be optimized in the future. The fact that + # primary nic is being attached first helps here. Otherwise each nic + # could add several seconds of delay. + try: + imds_md = get_metadata_from_imds( + ifname, + 5, + metadata_type.network) + except Exception as e: + LOG.warning( + "Failed to get network metadata using nic %s. Attempt to " + "contact IMDS failed with error %s. Assuming this is not the " + "primary nic.", ifname, e) + finally: + # If we are not the primary nic, then clean the dhcp context. + if imds_md is None: + dhcp_ctx.clean_network() + + if imds_md is not None: + # Only primary NIC will get a response from IMDS. + LOG.info("%s is the primary nic", ifname) + is_primary = True + + # If primary, set ephemeral dhcp ctx so we can report ready + self._ephemeral_dhcp_ctx = dhcp_ctx + + # Set the expected nic count based on the response received. + expected_nic_count = len( + imds_md['interface']) + report_diagnostic_event("Expected nic count: %d" % + expected_nic_count, logger_func=LOG.info) + + return is_primary, expected_nic_count + + @azure_ds_telemetry_reporter + def _wait_for_hot_attached_nics(self, nl_sock): + """Wait until all the expected nics for the vm are hot-attached. + The expected nic count is obtained by requesting the network metadata + from IMDS. + """ + LOG.info("Waiting for nics to be hot-attached") + try: + # Wait for nics to be attached one at a time, until we know for + # sure that all nics have been attached. + nics_found = [] + primary_nic_found = False + expected_nic_count = -1 + + # Wait for netlink nic attach events. After the first nic is + # attached, we are already in the customer vm deployment path and + # so eerything from then on should happen fast and avoid + # unnecessary delays wherever possible. + while True: + ifname = None + with events.ReportEventStack( + name="wait-for-nic-attach", + description=("wait for nic attach after %d nics have " + "been attached" % len(nics_found)), + parent=azure_ds_reporter): + ifname = netlink.wait_for_nic_attach_event(nl_sock, + nics_found) + + # wait_for_nic_attach_event guarantees that ifname it not None + nics_found.append(ifname) + report_diagnostic_event("Detected nic %s attached." % ifname, + logger_func=LOG.info) + + # Attempt to bring the interface's operating state to + # UP in case it is not already. + self.wait_for_link_up(ifname) + + # If primary nic is not found, check if this is it. The + # platform will attach the primary nic first so we + # won't be in primary_nic_found = false state for long. + if not primary_nic_found: + LOG.info("Checking if %s is the primary nic", + ifname) + (primary_nic_found, expected_nic_count) = ( + self._check_if_nic_is_primary(ifname)) + + # Exit criteria: check if we've discovered all nics + if (expected_nic_count != -1 + and len(nics_found) >= expected_nic_count): + LOG.info("Found all the nics for this VM.") + break + + except AssertionError as error: + report_diagnostic_event(error, logger_func=LOG.error) + + @azure_ds_telemetry_reporter + def _wait_for_all_nics_ready(self): + """Wait for nic(s) to be hot-attached. There may be multiple nics + depending on the customer request. + But only primary nic would be able to communicate with wireserver + and IMDS. So we detect and save the primary nic to be used later. + """ + + nl_sock = None + try: + nl_sock = netlink.create_bound_netlink_socket() + + report_ready_marker_present = bool( + os.path.isfile(REPORTED_READY_MARKER_FILE)) + + # Report ready if the marker file is not already present. + # The nic of the preprovisioned vm gets hot-detached as soon as + # we report ready. So no need to save the dhcp context. + self._report_ready_if_needed() + + has_nic_been_detached = bool( + os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE)) + + if not has_nic_been_detached: + LOG.info("NIC has not been detached yet.") + self._wait_for_nic_detach(nl_sock) + + # If we know that the preprovisioned nic has been detached, and we + # still have a fallback nic, then it means the VM must have + # rebooted as part of customer assignment, and all the nics have + # already been attached by the Azure platform. So there is no need + # to wait for nics to be hot-attached. + if not self.fallback_interface: + self._wait_for_hot_attached_nics(nl_sock) + else: + report_diagnostic_event("Skipping waiting for nic attach " + "because we already have a fallback " + "interface. Report Ready marker " + "present before detaching nics: %s" % + report_ready_marker_present, + logger_func=LOG.info) + except netlink.NetlinkCreateSocketError as e: + report_diagnostic_event(e, logger_func=LOG.warning) + raise + finally: + if nl_sock: + nl_sock.close() + def _poll_imds(self): """Poll IMDS for the new provisioning data until we get a valid response. Then return the returned JSON object.""" - url = IMDS_URL + "reprovisiondata?api-version=2017-04-02" + url = metadata_type.reprovisiondata.value headers = {"Metadata": "true"} nl_sock = None report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) @@ -611,16 +1002,14 @@ class DataSourceAzure(sources.DataSource): if self.imds_poll_counter == self.imds_logging_threshold: # Reducing the logging frequency as we are polling IMDS self.imds_logging_threshold *= 2 - LOG.debug("Call to IMDS with arguments %s failed " - "with status code %s after %s retries", - msg, exception.code, self.imds_poll_counter) LOG.debug("Backing off logging threshold for the same " "exception to %d", self.imds_logging_threshold) report_diagnostic_event("poll IMDS with %s failed. " "Exception: %s and code: %s" % (msg, exception.cause, - exception.code)) + exception.code), + logger_func=LOG.debug) self.imds_poll_counter += 1 return True else: @@ -629,24 +1018,41 @@ class DataSourceAzure(sources.DataSource): report_diagnostic_event("poll IMDS with %s failed. " "Exception: %s and code: %s" % (msg, exception.cause, - exception.code)) + exception.code), + logger_func=LOG.warning) return False - LOG.debug("poll IMDS failed with an unexpected exception: %s", - exception) - return False + report_diagnostic_event( + "poll IMDS failed with an " + "unexpected exception: %s" % exception, + logger_func=LOG.warning) + return False + + # When the interface is hot-attached, we would have already + # done dhcp and set the dhcp context. In that case, skip + # the attempt to do dhcp. + is_ephemeral_ctx_present = self._ephemeral_dhcp_ctx is not None + msg = ("Unexpected error. Dhcp context is not expected to be already " + "set when we need to wait for vnet switch") + if is_ephemeral_ctx_present and report_ready: + report_diagnostic_event(msg, logger_func=LOG.error) + raise RuntimeError(msg) - LOG.debug("Wait for vnetswitch to happen") while True: try: - # Save our EphemeralDHCPv4 context to avoid repeated dhcp - with events.ReportEventStack( - name="obtain-dhcp-lease", - description="obtain dhcp lease", - parent=azure_ds_reporter): - self._ephemeral_dhcp_ctx = EphemeralDHCPv4( - dhcp_log_func=dhcp_log_cb) - lease = self._ephemeral_dhcp_ctx.obtain_lease() + # Since is_ephemeral_ctx_present is set only once, this ensures + # that with regular reprovisioning, dhcp is always done every + # time the loop runs. + if not is_ephemeral_ctx_present: + # Save our EphemeralDHCPv4 context to avoid repeated dhcp + # later when we report ready + with events.ReportEventStack( + name="obtain-dhcp-lease", + description="obtain dhcp lease", + parent=azure_ds_reporter): + self._ephemeral_dhcp_ctx = EphemeralDHCPv4( + dhcp_log_func=dhcp_log_cb) + lease = self._ephemeral_dhcp_ctx.obtain_lease() if vnet_switched: dhcp_attempts += 1 @@ -654,19 +1060,24 @@ class DataSourceAzure(sources.DataSource): try: nl_sock = netlink.create_bound_netlink_socket() except netlink.NetlinkCreateSocketError as e: - report_diagnostic_event(e) - LOG.warning(e) + report_diagnostic_event( + 'Failed to create bound netlink socket: %s' % e, + logger_func=LOG.warning) self._ephemeral_dhcp_ctx.clean_network() break - path = REPORTED_READY_MARKER_FILE - LOG.info( - "Creating a marker file to report ready: %s", path) - util.write_file(path, "{pid}: {time}\n".format( - pid=os.getpid(), time=time())) - self._report_ready(lease=lease) + report_ready_succeeded = self._report_ready(lease=lease) + if not report_ready_succeeded: + msg = ('Failed reporting ready while in ' + 'the preprovisioning pool.') + report_diagnostic_event(msg, logger_func=LOG.error) + self._ephemeral_dhcp_ctx.clean_network() + raise sources.InvalidMetaDataException(msg) + + self._create_report_ready_marker() report_ready = False + LOG.debug("Wait for vnetswitch to happen") with events.ReportEventStack( name="wait-for-media-disconnect-connect", description="wait for vnet switch", @@ -674,9 +1085,10 @@ class DataSourceAzure(sources.DataSource): try: netlink.wait_for_media_disconnect_connect( nl_sock, lease['interface']) - except AssertionError as error: - report_diagnostic_event(error) - LOG.error(error) + except AssertionError as e: + report_diagnostic_event( + 'Error while waiting for vnet switch: %s' % e, + logger_func=LOG.error) break vnet_switched = True @@ -702,21 +1114,113 @@ class DataSourceAzure(sources.DataSource): if vnet_switched: report_diagnostic_event("attempted dhcp %d times after reuse" % - dhcp_attempts) + dhcp_attempts, + logger_func=LOG.debug) report_diagnostic_event("polled imds %d times after reuse" % - self.imds_poll_counter) + self.imds_poll_counter, + logger_func=LOG.debug) return return_val @azure_ds_telemetry_reporter - def _report_ready(self, lease): - """Tells the fabric provisioning has completed """ + def _report_failure(self, description=None) -> bool: + """Tells the Azure fabric that provisioning has failed. + + @param description: A description of the error encountered. + @return: The success status of sending the failure signal. + """ + unknown_245_key = 'unknown-245' + + try: + if (self.distro.networking.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None) and + getattr(self._ephemeral_dhcp_ctx, 'lease', None) and + unknown_245_key in self._ephemeral_dhcp_ctx.lease): + report_diagnostic_event( + 'Using cached ephemeral dhcp context ' + 'to report failure to Azure', logger_func=LOG.debug) + report_failure_to_fabric( + dhcp_opts=self._ephemeral_dhcp_ctx.lease[unknown_245_key], + description=description) + self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using ' + 'cached ephemeral dhcp context: %s' % e, + logger_func=LOG.error) + + try: + report_diagnostic_event( + 'Using new ephemeral dhcp to report failure to Azure', + logger_func=LOG.debug) + with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: + report_failure_to_fabric( + dhcp_opts=lease[unknown_245_key], + description=description) + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using new ephemeral dhcp: %s' % e, + logger_func=LOG.debug) + + try: + report_diagnostic_event( + 'Using fallback lease to report failure to Azure') + report_failure_to_fabric( + fallback_lease_file=self.dhclient_lease_file, + description=description) + return True + except Exception as e: + report_diagnostic_event( + 'Failed to report failure using fallback lease: %s' % e, + logger_func=LOG.debug) + + return False + + def _report_ready(self, lease: dict) -> bool: + """Tells the fabric provisioning has completed. + + @param lease: dhcp lease to use for sending the ready signal. + @return: The success status of sending the ready signal. + """ try: get_metadata_from_fabric(None, lease['unknown-245']) - except Exception: - LOG.warning( - "Error communicating with Azure fabric; You may experience." - "connectivity issues.", exc_info=True) + return True + except Exception as e: + report_diagnostic_event( + "Error communicating with Azure fabric; You may experience " + "connectivity issues: %s" % e, logger_func=LOG.warning) + return False + + def _should_reprovision_after_nic_attach(self, candidate_metadata) -> bool: + """Whether or not we should wait for nic attach and then poll + IMDS for reprovisioning data. Also sets a marker file to poll IMDS. + + The marker file is used for the following scenario: the VM boots into + wait for nic attach, which we expect to be proceeding infinitely until + the nic is attached. If for whatever reason the platform moves us to a + new host (for instance a hardware issue), we need to keep waiting. + However, since the VM reports ready to the Fabric, we will not attach + the ISO, thus cloud-init needs to have a way of knowing that it should + jump back into the waiting mode in order to retrieve the ovf_env. + + @param candidate_metadata: Metadata obtained from reading ovf-env. + @return: Whether to reprovision after waiting for nics to be attached. + """ + if not candidate_metadata: + return False + (_md, _userdata_raw, cfg, _files) = candidate_metadata + path = REPROVISION_NIC_ATTACH_MARKER_FILE + if (cfg.get('PreprovisionedVMType', None) == "Savable" or + os.path.isfile(path)): + if not os.path.isfile(path): + LOG.info("Creating a marker file to wait for nic attach: %s", + path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + return True + return False def _should_reprovision(self, ret): """Whether or not we should poll IMDS for reprovisioning data. @@ -734,6 +1238,7 @@ class DataSourceAzure(sources.DataSource): (_md, _userdata_raw, cfg, _files) = ret path = REPROVISION_MARKER_FILE if (cfg.get('PreprovisionedVm') is True or + cfg.get('PreprovisionedVMType', None) == 'Running' or os.path.isfile(path)): if not os.path.isfile(path): LOG.info("Creating a marker file to poll imds: %s", @@ -764,7 +1269,22 @@ class DataSourceAzure(sources.DataSource): if self.ds_cfg['agent_command'] == AGENT_START_BUILTIN: self.bounce_network_with_azure_hostname() - pubkey_info = self.cfg.get('_pubkeys', None) + pubkey_info = None + try: + public_keys = self.metadata['imds']['compute']['publicKeys'] + LOG.debug( + 'Successfully retrieved %s key(s) from IMDS', + len(public_keys) + if public_keys is not None + else 0 + ) + except KeyError: + LOG.debug( + 'Unable to retrieve SSH keys from IMDS during ' + 'negotiation, falling back to OVF' + ) + pubkey_info = self.cfg.get('_pubkeys', None) + metadata_func = partial(get_metadata_from_fabric, fallback_lease_file=self. dhclient_lease_file, @@ -779,14 +1299,13 @@ class DataSourceAzure(sources.DataSource): except Exception as e: report_diagnostic_event( "Error communicating with Azure fabric; You may experience " - "connectivity issues: %s" % e) - LOG.warning( - "Error communicating with Azure fabric; You may experience " - "connectivity issues.", exc_info=True) + "connectivity issues: %s" % e, logger_func=LOG.warning) return False util.del_file(REPORTED_READY_MARKER_FILE) util.del_file(REPROVISION_MARKER_FILE) + util.del_file(REPROVISION_NIC_ATTACH_MARKER_FILE) + util.del_file(REPROVISION_NIC_DETACHED_MARKER_FILE) return fabric_data @azure_ds_telemetry_reporter @@ -947,9 +1466,10 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, log_pre="Azure ephemeral disk: ") if missing: - LOG.warning("ephemeral device '%s' did" - " not appear after %d seconds.", - devpath, maxwait) + report_diagnostic_event( + "ephemeral device '%s' did not appear after %d seconds." % + (devpath, maxwait), + logger_func=LOG.warning) return result = False @@ -1034,7 +1554,9 @@ def pubkeys_from_crt_files(flist): errors.append(fname) if errors: - LOG.warning("failed to convert the crt files to pubkey: %s", errors) + report_diagnostic_event( + "failed to convert the crt files to pubkey: %s" % errors, + logger_func=LOG.warning) return pubkeys @@ -1146,7 +1668,7 @@ def read_azure_ovf(contents): dom = minidom.parseString(contents) except Exception as e: error_str = "Invalid ovf-env.xml: %s" % e - report_diagnostic_event(error_str) + report_diagnostic_event(error_str, logger_func=LOG.warning) raise BrokenAzureDataSource(error_str) from e results = find_child(dom.documentElement, @@ -1231,7 +1753,7 @@ def read_azure_ovf(contents): if password: defuser['lock_passwd'] = False if DEF_PASSWD_REDACTION != password: - defuser['passwd'] = encrypt_pass(password) + defuser['passwd'] = cfg['password'] = encrypt_pass(password) if defuser: cfg['system_info'] = {'default_user': defuser} @@ -1239,34 +1761,109 @@ def read_azure_ovf(contents): if 'ssh_pwauth' not in cfg and password: cfg['ssh_pwauth'] = True - cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom) + preprovisioning_cfg = _get_preprovisioning_cfgs(dom) + cfg = util.mergemanydict([cfg, preprovisioning_cfg]) return (md, ud, cfg) @azure_ds_telemetry_reporter -def _extract_preprovisioned_vm_setting(dom): - """Read the preprovision flag from the ovf. It should not - exist unless true.""" +def _get_preprovisioning_cfgs(dom): + """Read the preprovisioning related flags from ovf and populates a dict + with the info. + + Two flags are in use today: PreprovisionedVm bool and + PreprovisionedVMType enum. In the long term, the PreprovisionedVm bool + will be deprecated in favor of PreprovisionedVMType string/enum. + + Only these combinations of values are possible today: + - PreprovisionedVm=True and PreprovisionedVMType=Running + - PreprovisionedVm=False and PreprovisionedVMType=Savable + - PreprovisionedVm is missing and PreprovisionedVMType=Running/Savable + - PreprovisionedVm=False and PreprovisionedVMType is missing + + More specifically, this will never happen: + - PreprovisionedVm=True and PreprovisionedVMType=Savable + """ + cfg = { + "PreprovisionedVm": False, + "PreprovisionedVMType": None + } + platform_settings_section = find_child( dom.documentElement, lambda n: n.localName == "PlatformSettingsSection") if not platform_settings_section or len(platform_settings_section) == 0: LOG.debug("PlatformSettingsSection not found") - return False + return cfg platform_settings = find_child( platform_settings_section[0], lambda n: n.localName == "PlatformSettings") if not platform_settings or len(platform_settings) == 0: LOG.debug("PlatformSettings not found") - return False - preprovisionedVm = find_child( + return cfg + + # Read the PreprovisionedVm bool flag. This should be deprecated when the + # platform has removed PreprovisionedVm and only surfaces + # PreprovisionedVMType. + cfg["PreprovisionedVm"] = _get_preprovisionedvm_cfg_value( + platform_settings) + + cfg["PreprovisionedVMType"] = _get_preprovisionedvmtype_cfg_value( + platform_settings) + return cfg + + +@azure_ds_telemetry_reporter +def _get_preprovisionedvm_cfg_value(platform_settings): + preprovisionedVm = False + + # Read the PreprovisionedVm bool flag. This should be deprecated when the + # platform has removed PreprovisionedVm and only surfaces + # PreprovisionedVMType. + preprovisionedVmVal = find_child( platform_settings[0], lambda n: n.localName == "PreprovisionedVm") - if not preprovisionedVm or len(preprovisionedVm) == 0: + if not preprovisionedVmVal or len(preprovisionedVmVal) == 0: LOG.debug("PreprovisionedVm not found") - return False - return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue) + return preprovisionedVm + preprovisionedVm = util.translate_bool( + preprovisionedVmVal[0].firstChild.nodeValue) + + report_diagnostic_event( + "PreprovisionedVm: %s" % preprovisionedVm, logger_func=LOG.info) + + return preprovisionedVm + + +@azure_ds_telemetry_reporter +def _get_preprovisionedvmtype_cfg_value(platform_settings): + preprovisionedVMType = None + + # Read the PreprovisionedVMType value from the ovf. It can be + # 'Running' or 'Savable' or not exist. This enum value is intended to + # replace PreprovisionedVm bool flag in the long term. + # A Running VM is the same as preprovisioned VMs of today. This is + # equivalent to having PreprovisionedVm=True. + # A Savable VM is one whose nic is hot-detached immediately after it + # reports ready the first time to free up the network resources. + # Once assigned to customer, the customer-requested nics are + # hot-attached to it and reprovision happens like today. + preprovisionedVMTypeVal = find_child( + platform_settings[0], + lambda n: n.localName == "PreprovisionedVMType") + if (not preprovisionedVMTypeVal or len(preprovisionedVMTypeVal) == 0 or + preprovisionedVMTypeVal[0].firstChild is None): + LOG.debug("PreprovisionedVMType not found") + return preprovisionedVMType + + preprovisionedVMType = preprovisionedVMTypeVal[0].firstChild.nodeValue + + report_diagnostic_event( + "PreprovisionedVMType: %s" % preprovisionedVMType, + logger_func=LOG.info) + + return preprovisionedVMType def encrypt_pass(password, salt_id="$6$"): @@ -1338,81 +1935,100 @@ def load_azure_ds_dir(source_dir): return (md, ud, cfg, {'ovf-env.xml': contents}) -def parse_network_config(imds_metadata): +@azure_ds_telemetry_reporter +def parse_network_config(imds_metadata) -> dict: """Convert imds_metadata dictionary to network v2 configuration. - Parses network configuration from imds metadata if present or generate fallback network config excluding mlx4_core devices. @param: imds_metadata: Dict of content read from IMDS network service. @return: Dictionary containing network version 2 standard configuration. """ - with events.ReportEventStack( - name="parse_network_config", - description="", - parent=azure_ds_reporter - ) as evt: - if imds_metadata != sources.UNSET and imds_metadata: - netconfig = {'version': 2, 'ethernets': {}} - LOG.debug('Azure: generating network configuration from IMDS') - network_metadata = imds_metadata['network'] - for idx, intf in enumerate(network_metadata['interface']): - # First IPv4 and/or IPv6 address will be obtained via DHCP. - # Any additional IPs of each type will be set as static - # addresses. - nicname = 'eth{idx}'.format(idx=idx) - dhcp_override = {'route-metric': (idx + 1) * 100} - dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override, - 'dhcp6': False} - for addr_type in ('ipv4', 'ipv6'): - addresses = intf.get(addr_type, {}).get('ipAddress', []) - if addr_type == 'ipv4': - default_prefix = '24' - else: - default_prefix = '128' - if addresses: - dev_config['dhcp6'] = True - # non-primary interfaces should have a higher - # route-metric (cost) so default routes prefer - # primary nic due to lower route-metric value - dev_config['dhcp6-overrides'] = dhcp_override - for addr in addresses[1:]: - # Append static address config for ip > 1 - netPrefix = intf[addr_type]['subnet'][0].get( - 'prefix', default_prefix) - privateIp = addr['privateIpAddress'] - if not dev_config.get('addresses'): - dev_config['addresses'] = [] - dev_config['addresses'].append( - '{ip}/{prefix}'.format( - ip=privateIp, prefix=netPrefix)) - if dev_config: - mac = ':'.join(re.findall(r'..', intf['macAddress'])) - dev_config.update({ - 'match': {'macaddress': mac.lower()}, - 'set-name': nicname - }) - # With netvsc, we can get two interfaces that - # share the same MAC, so we need to make sure - # our match condition also contains the driver - driver = device_driver(nicname) - if driver and driver == 'hv_netvsc': - dev_config['match']['driver'] = driver - netconfig['ethernets'][nicname] = dev_config - evt.description = "network config from imds" - else: - blacklist = ['mlx4_core'] - LOG.debug('Azure: generating fallback configuration') - # generate a network config, blacklist picking mlx4_core devs - netconfig = net.generate_fallback_config( - blacklist_drivers=blacklist, config_driver=True) - evt.description = "network config from fallback" - return netconfig + if imds_metadata != sources.UNSET and imds_metadata: + try: + return _generate_network_config_from_imds_metadata(imds_metadata) + except Exception as e: + LOG.error( + 'Failed generating network config ' + 'from IMDS network metadata: %s', str(e)) + try: + return _generate_network_config_from_fallback_config() + except Exception as e: + LOG.error('Failed generating fallback network config: %s', str(e)) + return {} + + +@azure_ds_telemetry_reporter +def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: + """Convert imds_metadata dictionary to network v2 configuration. + Parses network configuration from imds metadata. + + @param: imds_metadata: Dict of content read from IMDS network service. + @return: Dictionary containing network version 2 standard configuration. + """ + netconfig = {'version': 2, 'ethernets': {}} + network_metadata = imds_metadata['network'] + for idx, intf in enumerate(network_metadata['interface']): + # First IPv4 and/or IPv6 address will be obtained via DHCP. + # Any additional IPs of each type will be set as static + # addresses. + nicname = 'eth{idx}'.format(idx=idx) + dhcp_override = {'route-metric': (idx + 1) * 100} + dev_config = {'dhcp4': True, 'dhcp4-overrides': dhcp_override, + 'dhcp6': False} + for addr_type in ('ipv4', 'ipv6'): + addresses = intf.get(addr_type, {}).get('ipAddress', []) + if addr_type == 'ipv4': + default_prefix = '24' + else: + default_prefix = '128' + if addresses: + dev_config['dhcp6'] = True + # non-primary interfaces should have a higher + # route-metric (cost) so default routes prefer + # primary nic due to lower route-metric value + dev_config['dhcp6-overrides'] = dhcp_override + for addr in addresses[1:]: + # Append static address config for ip > 1 + netPrefix = intf[addr_type]['subnet'][0].get( + 'prefix', default_prefix) + privateIp = addr['privateIpAddress'] + if not dev_config.get('addresses'): + dev_config['addresses'] = [] + dev_config['addresses'].append( + '{ip}/{prefix}'.format( + ip=privateIp, prefix=netPrefix)) + if dev_config: + mac = ':'.join(re.findall(r'..', intf['macAddress'])) + dev_config.update({ + 'match': {'macaddress': mac.lower()}, + 'set-name': nicname + }) + # With netvsc, we can get two interfaces that + # share the same MAC, so we need to make sure + # our match condition also contains the driver + driver = device_driver(nicname) + if driver and driver == 'hv_netvsc': + dev_config['match']['driver'] = driver + netconfig['ethernets'][nicname] = dev_config + return netconfig + + +@azure_ds_telemetry_reporter +def _generate_network_config_from_fallback_config() -> dict: + """Generate fallback network config excluding blacklisted devices. + + @return: Dictionary containing network version 2 standard configuration. + """ + return net.generate_fallback_config( + blacklist_drivers=BLACKLIST_DRIVERS, config_driver=True) @azure_ds_telemetry_reporter -def get_metadata_from_imds(fallback_nic, retries): - """Query Azure's network metadata service, returning a dictionary. +def get_metadata_from_imds(fallback_nic, + retries, + md_type=metadata_type.compute): + """Query Azure's instance metadata service, returning a dictionary. If network is not up, setup ephemeral dhcp on fallback_nic to talk to the IMDS. For more info on IMDS: @@ -1427,7 +2043,7 @@ def get_metadata_from_imds(fallback_nic, retries): """ kwargs = {'logfunc': LOG.debug, 'msg': 'Crawl of Azure Instance Metadata Service (IMDS)', - 'func': _get_metadata_from_imds, 'args': (retries,)} + 'func': _get_metadata_from_imds, 'args': (retries, md_type,)} if net.is_up(fallback_nic): return util.log_time(**kwargs) else: @@ -1436,23 +2052,26 @@ def get_metadata_from_imds(fallback_nic, retries): azure_ds_reporter, fallback_nic): return util.log_time(**kwargs) except Exception as e: - report_diagnostic_event("exception while getting metadata: %s" % e) + report_diagnostic_event( + "exception while getting metadata: %s" % e, + logger_func=LOG.warning) raise @azure_ds_telemetry_reporter -def _get_metadata_from_imds(retries): +def _get_metadata_from_imds(retries, md_type=metadata_type.compute): - url = IMDS_URL + "instance?api-version=2017-12-01" + url = md_type.value headers = {"Metadata": "true"} try: response = readurl( url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers, retries=retries, exception_cb=retry_on_url_exc) except Exception as e: - msg = 'Ignoring IMDS instance metadata: %s' % e - report_diagnostic_event(msg) - LOG.debug(msg) + report_diagnostic_event( + 'Ignoring IMDS instance metadata. ' + 'Get metadata from IMDS failed: %s' % e, + logger_func=LOG.warning) return {} try: from json.decoder import JSONDecodeError @@ -1463,9 +2082,10 @@ def _get_metadata_from_imds(retries): try: return util.load_json(str(response)) except json_decode_error as e: - report_diagnostic_event('non-json imds response' % e) - LOG.warning( - 'Ignoring non-json IMDS instance metadata: %s', str(response)) + report_diagnostic_event( + 'Ignoring non-json IMDS instance metadata response: %s. ' + 'Loading non-json IMDS response failed: %s' % (str(response), e), + logger_func=LOG.warning) return {} @@ -1513,13 +2133,12 @@ def _is_platform_viable(seed_dir): description="found azure asset tag", parent=azure_ds_reporter ) as evt: - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') if asset_tag == AZURE_CHASSIS_ASSET_TAG: return True msg = "Non-Azure DMI asset tag '%s' discovered." % asset_tag - LOG.debug(msg) evt.description = msg - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.debug) if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')): return True return False diff --git a/cloudinit/sources/DataSourceBigstep.py b/cloudinit/sources/DataSourceBigstep.py index 52fff20a..63435279 100644 --- a/cloudinit/sources/DataSourceBigstep.py +++ b/cloudinit/sources/DataSourceBigstep.py @@ -7,13 +7,10 @@ import errno import json -from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper from cloudinit import util -LOG = logging.getLogger(__name__) - class DataSourceBigstep(sources.DataSource): diff --git a/cloudinit/sources/DataSourceCloudSigma.py b/cloudinit/sources/DataSourceCloudSigma.py index df88f677..f63baf74 100644 --- a/cloudinit/sources/DataSourceCloudSigma.py +++ b/cloudinit/sources/DataSourceCloudSigma.py @@ -9,9 +9,9 @@ import re from cloudinit.cs_utils import Cepko, SERIAL_PORT +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources -from cloudinit import util LOG = logging.getLogger(__name__) @@ -38,7 +38,7 @@ class DataSourceCloudSigma(sources.DataSource): """ LOG.debug("determining hypervisor product name via dmi data") - sys_product_name = util.read_dmi_data("system-product-name") + sys_product_name = dmi.read_dmi_data("system-product-name") if not sys_product_name: LOG.debug("system-product-name not available in dmi data") return False diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 1d09c12a..1930a509 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -11,6 +11,7 @@ import os import time +from cloudinit import dmi from cloudinit import ec2_utils as ec2 from cloudinit import log as logging from cloudinit import net @@ -699,26 +700,26 @@ def _collect_platform_data(): uuid = util.load_file("/sys/hypervisor/uuid").strip() data['uuid_source'] = 'hypervisor' except Exception: - uuid = util.read_dmi_data('system-uuid') + uuid = dmi.read_dmi_data('system-uuid') data['uuid_source'] = 'dmi' if uuid is None: uuid = '' data['uuid'] = uuid.lower() - serial = util.read_dmi_data('system-serial-number') + serial = dmi.read_dmi_data('system-serial-number') if serial is None: serial = '' data['serial'] = serial.lower() - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') if asset_tag is None: asset_tag = '' data['asset_tag'] = asset_tag.lower() - vendor = util.read_dmi_data('system-manufacturer') + vendor = dmi.read_dmi_data('system-manufacturer') data['vendor'] = (vendor if vendor else '').lower() return data diff --git a/cloudinit/sources/DataSourceExoscale.py b/cloudinit/sources/DataSourceExoscale.py index d59aefd1..adee6d79 100644 --- a/cloudinit/sources/DataSourceExoscale.py +++ b/cloudinit/sources/DataSourceExoscale.py @@ -3,6 +3,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +from cloudinit import dmi from cloudinit import ec2_utils as ec2 from cloudinit import log as logging from cloudinit import sources @@ -135,7 +136,7 @@ class DataSourceExoscale(sources.DataSource): return self.extra_config def _is_platform_viable(self): - return util.read_dmi_data('system-product-name').startswith( + return dmi.read_dmi_data('system-product-name').startswith( EXOSCALE_DMI_NAME) diff --git a/cloudinit/sources/DataSourceGCE.py b/cloudinit/sources/DataSourceGCE.py index 0ec5f6ec..746caddb 100644 --- a/cloudinit/sources/DataSourceGCE.py +++ b/cloudinit/sources/DataSourceGCE.py @@ -7,6 +7,7 @@ import json from base64 import b64decode +from cloudinit import dmi from cloudinit.distros import ug_util from cloudinit import log as logging from cloudinit import sources @@ -248,12 +249,12 @@ def read_md(address=None, platform_check=True): def platform_reports_gce(): - pname = util.read_dmi_data('system-product-name') or "N/A" + pname = dmi.read_dmi_data('system-product-name') or "N/A" if pname == "Google Compute Engine": return True # system-product-name is not always guaranteed (LP: #1674861) - serial = util.read_dmi_data('system-serial-number') or "N/A" + serial = dmi.read_dmi_data('system-serial-number') or "N/A" if serial.startswith("GoogleCloud-"): return True diff --git a/cloudinit/sources/DataSourceHetzner.py b/cloudinit/sources/DataSourceHetzner.py index a86035e0..c7c88dd7 100644 --- a/cloudinit/sources/DataSourceHetzner.py +++ b/cloudinit/sources/DataSourceHetzner.py @@ -3,9 +3,10 @@ # # This file is part of cloud-init. See LICENSE file for license information. # -"""Hetzner Cloud API Documentation. +"""Hetzner Cloud API Documentation https://docs.hetzner.cloud/""" +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net as cloudnet from cloudinit import sources @@ -46,9 +47,12 @@ class DataSourceHetzner(sources.DataSource): self._network_config = None self.dsmode = sources.DSMODE_NETWORK - def get_data(self): - if not on_hetzner(): + def _get_data(self): + (on_hetzner, serial) = get_hcloud_data() + + if not on_hetzner: return False + nic = cloudnet.find_fallback_nic() with cloudnet.EphemeralIPv4Network(nic, "169.254.0.1", 16, "169.254.255.255"): @@ -78,8 +82,18 @@ class DataSourceHetzner(sources.DataSource): self.metadata['public-keys'] = md.get('public-keys', None) self.vendordata_raw = md.get("vendor_data", None) + # instance-id and serial from SMBIOS should be identical + if self.get_instance_id() != serial: + raise RuntimeError( + "SMBIOS serial does not match instance ID from metadata" + ) + return True + def check_instance_id(self, sys_cfg): + return sources.instance_id_matches_system_uuid( + self.get_instance_id(), 'system-serial-number') + @property def network_config(self): """Configure the networking. This needs to be done each boot, since @@ -99,8 +113,18 @@ class DataSourceHetzner(sources.DataSource): return self._network_config -def on_hetzner(): - return util.read_dmi_data('system-manufacturer') == "Hetzner" +def get_hcloud_data(): + vendor_name = dmi.read_dmi_data('system-manufacturer') + if vendor_name != "Hetzner": + return (False, None) + + serial = dmi.read_dmi_data("system-serial-number") + if serial: + LOG.debug("Running on Hetzner Cloud: serial=%s", serial) + else: + raise RuntimeError("Hetzner Cloud detected, but no serial found") + + return (True, serial) # Used to match classes to dependencies diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index e408d730..a126aad3 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -11,6 +11,7 @@ import errno import os +from cloudinit import dmi from cloudinit import log as logging from cloudinit.net import eni from cloudinit import sources @@ -61,7 +62,7 @@ class DataSourceNoCloud(sources.DataSource): # Parse the system serial label from dmi. If not empty, try parsing # like the commandline md = {} - serial = util.read_dmi_data('system-serial-number') + serial = dmi.read_dmi_data('system-serial-number') if serial and load_cmdline_data(md, serial): found.append("dmi") mydata = _merge_new_seed(mydata, {'meta-data': md}) @@ -157,13 +158,14 @@ class DataSourceNoCloud(sources.DataSource): # This could throw errors, but the user told us to do it # so if errors are raised, let them raise - (md_seed, ud) = util.read_seeded(seedfrom, timeout=None) + (md_seed, ud, vd) = util.read_seeded(seedfrom, timeout=None) LOG.debug("Using seeded cache data from %s", seedfrom) # Values in the command line override those from the seed mydata['meta-data'] = util.mergemanydict([mydata['meta-data'], md_seed]) mydata['user-data'] = ud + mydata['vendor-data'] = vd found.append(seedfrom) # Now that we have exhausted any other places merge in the defaults diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py index e6250801..b7656ac5 100644 --- a/cloudinit/sources/DataSourceNone.py +++ b/cloudinit/sources/DataSourceNone.py @@ -4,11 +4,8 @@ # # This file is part of cloud-init. See LICENSE file for license information. -from cloudinit import log as logging from cloudinit import sources -LOG = logging.getLogger(__name__) - class DataSourceNone(sources.DataSource): diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index e53d2eb1..741c140a 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -14,6 +14,7 @@ import re import time from xml.dom import minidom +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import subp @@ -73,6 +74,7 @@ class DataSourceOVF(sources.DataSource): found = [] md = {} ud = "" + vd = "" vmwareImcConfigFilePath = None nicspath = None @@ -82,7 +84,7 @@ class DataSourceOVF(sources.DataSource): (seedfile, contents) = get_ovf_env(self.paths.seed_dir) - system_type = util.read_dmi_data("system-product-name") + system_type = dmi.read_dmi_data("system-product-name") if system_type is None: LOG.debug("No system-product-name found") @@ -304,7 +306,7 @@ class DataSourceOVF(sources.DataSource): seedfrom, self) return False - (md_seed, ud) = util.read_seeded(seedfrom, timeout=None) + (md_seed, ud, vd) = util.read_seeded(seedfrom, timeout=None) LOG.debug("Using seeded cache data from %s", seedfrom) md = util.mergemanydict([md, md_seed]) @@ -316,11 +318,12 @@ class DataSourceOVF(sources.DataSource): self.seed = ",".join(found) self.metadata = md self.userdata_raw = ud + self.vendordata_raw = vd self.cfg = cfg return True def _get_subplatform(self): - system_type = util.read_dmi_data("system-product-name").lower() + system_type = dmi.read_dmi_data("system-product-name").lower() if system_type == 'vmware': return 'vmware (%s)' % self.seed return 'ovf (%s)' % self.seed diff --git a/cloudinit/sources/DataSourceOpenNebula.py b/cloudinit/sources/DataSourceOpenNebula.py index 45481938..730ec586 100644 --- a/cloudinit/sources/DataSourceOpenNebula.py +++ b/cloudinit/sources/DataSourceOpenNebula.py @@ -350,7 +350,8 @@ def parse_shell_config(content, keylist=None, bash=None, asuser=None, # exclude vars in bash that change on their own or that we used excluded = ( "EPOCHREALTIME", "EPOCHSECONDS", "RANDOM", "LINENO", "SECONDS", "_", - "__v") + "SRANDOM", "__v", + ) preset = {} ret = {} target = None diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index d4b43f44..b3406c67 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -6,6 +6,7 @@ import time +from cloudinit import dmi from cloudinit import log as logging from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit import sources @@ -32,7 +33,8 @@ DMI_ASSET_TAG_OPENTELEKOM = 'OpenTelekomCloud' # See github.com/sapcc/helm-charts/blob/master/openstack/nova/values.yaml # -> compute.defaults.vmware.smbios_asset_tag for this value DMI_ASSET_TAG_SAPCCLOUD = 'SAP CCloud VM' -VALID_DMI_ASSET_TAGS = [DMI_ASSET_TAG_OPENTELEKOM, DMI_ASSET_TAG_SAPCCLOUD] +VALID_DMI_ASSET_TAGS = VALID_DMI_PRODUCT_NAMES +VALID_DMI_ASSET_TAGS += [DMI_ASSET_TAG_OPENTELEKOM, DMI_ASSET_TAG_SAPCCLOUD] class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): @@ -224,10 +226,10 @@ def detect_openstack(accept_oracle=False): """Return True when a potential OpenStack platform is detected.""" if not util.is_x86(): return True # Non-Intel cpus don't properly report dmi product names - product_name = util.read_dmi_data('system-product-name') + product_name = dmi.read_dmi_data('system-product-name') if product_name in VALID_DMI_PRODUCT_NAMES: return True - elif util.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: + elif dmi.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: return True elif accept_oracle and oracle._is_platform_viable(): return True diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py index 20d6487d..bf81b10b 100644 --- a/cloudinit/sources/DataSourceOracle.py +++ b/cloudinit/sources/DataSourceOracle.py @@ -17,6 +17,7 @@ import base64 from collections import namedtuple from contextlib import suppress as noop +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net, sources, util from cloudinit.net import ( @@ -273,12 +274,12 @@ class DataSourceOracle(sources.DataSource): def _read_system_uuid(): - sys_uuid = util.read_dmi_data('system-uuid') + sys_uuid = dmi.read_dmi_data('system-uuid') return None if sys_uuid is None else sys_uuid.lower() def _is_platform_viable(): - asset_tag = util.read_dmi_data('chassis-asset-tag') + asset_tag = dmi.read_dmi_data('chassis-asset-tag') return asset_tag == CHASSIS_ASSET_TAG diff --git a/cloudinit/sources/DataSourceRbxCloud.py b/cloudinit/sources/DataSourceRbxCloud.py index e064c8d6..0b8994bf 100644 --- a/cloudinit/sources/DataSourceRbxCloud.py +++ b/cloudinit/sources/DataSourceRbxCloud.py @@ -71,11 +71,13 @@ def gratuitous_arp(items, distro): def get_md(): - rbx_data = None + """Returns False (not found or error) or a dictionary with metadata.""" devices = set( util.find_devs_with('LABEL=CLOUDMD') + util.find_devs_with('LABEL=cloudmd') ) + if not devices: + return False for device in devices: try: rbx_data = util.mount_cb( @@ -84,17 +86,17 @@ def get_md(): mtype=['vfat', 'fat', 'msdosfs'] ) if rbx_data: - break + return rbx_data except OSError as err: if err.errno != errno.ENOENT: raise except util.MountFailedError: util.logexc(LOG, "Failed to mount %s when looking for user " "data", device) - if not rbx_data: - util.logexc(LOG, "Failed to load metadata and userdata") - return False - return rbx_data + + LOG.debug("Did not find RbxCloud data, searched devices: %s", + ",".join(devices)) + return False def generate_network_config(netadps): @@ -223,6 +225,8 @@ class DataSourceRbxCloud(sources.DataSource): is used to perform instance configuration. """ rbx_data = get_md() + if rbx_data is False: + return False self.userdata_raw = rbx_data['userdata'] self.metadata = rbx_data['metadata'] self.gratuitous_arp = rbx_data['gratuitous_arp'] diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py index 83c2bf65..41be7665 100644 --- a/cloudinit/sources/DataSourceScaleway.py +++ b/cloudinit/sources/DataSourceScaleway.py @@ -25,6 +25,7 @@ import requests from requests.packages.urllib3.connection import HTTPConnection from requests.packages.urllib3.poolmanager import PoolManager +from cloudinit import dmi from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper @@ -56,7 +57,7 @@ def on_scaleway(): * the initrd created the file /var/run/scaleway. * "scaleway" is in the kernel cmdline. """ - vendor_name = util.read_dmi_data('system-manufacturer') + vendor_name = dmi.read_dmi_data('system-manufacturer') if vendor_name == 'Scaleway': return True diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py index f1f903bc..fd292baa 100644 --- a/cloudinit/sources/DataSourceSmartOS.py +++ b/cloudinit/sources/DataSourceSmartOS.py @@ -30,6 +30,7 @@ import random import re import socket +from cloudinit import dmi from cloudinit import log as logging from cloudinit import serial from cloudinit import sources @@ -767,7 +768,7 @@ def get_smartos_environ(uname_version=None, product_name=None): return SMARTOS_ENV_LX_BRAND if product_name is None: - system_type = util.read_dmi_data("system-product-name") + system_type = dmi.read_dmi_data("system-product-name") else: system_type = product_name diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index c4d60fff..9dccc687 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -14,6 +14,7 @@ import json import os from collections import namedtuple +from cloudinit import dmi from cloudinit import importer from cloudinit import log as logging from cloudinit import net @@ -809,7 +810,7 @@ def instance_id_matches_system_uuid(instance_id, field='system-uuid'): if not instance_id: return False - dmi_value = util.read_dmi_data(field) + dmi_value = dmi.read_dmi_data(field) if not dmi_value: return False return instance_id.lower() == dmi_value.lower() diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index b968a96f..d3055d08 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -9,6 +9,7 @@ import struct import time import textwrap import zlib +from errno import ENOENT from cloudinit.settings import CFG_BUILTIN from cloudinit.net import dhcp @@ -16,6 +17,7 @@ from cloudinit import stages from cloudinit import temp_utils from contextlib import contextmanager from xml.etree import ElementTree +from xml.sax.saxutils import escape from cloudinit import subp from cloudinit import url_helper @@ -41,13 +43,19 @@ COMPRESSED_EVENT_TYPE = 'compressed' # cloud-init.log files where the P95 of the file sizes was 537KB and the time # consumed to dump 500KB file was (P95:76, P99:233, P99.9:1170) in ms MAX_LOG_TO_KVP_LENGTH = 512000 -# Marker file to indicate whether cloud-init.log is pushed to KVP -LOG_PUSHED_TO_KVP_MARKER_FILE = '/var/lib/cloud/data/log_pushed_to_kvp' +# File to store the last byte of cloud-init.log that was pushed to KVP. This +# file will be deleted with every VM reboot. +LOG_PUSHED_TO_KVP_INDEX_FILE = '/run/cloud-init/log_pushed_to_kvp_index' azure_ds_reporter = events.ReportEventStack( name="azure-ds", description="initialize reporter for azure ds", reporting_enabled=True) +DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE = ( + 'The VM encountered an error during deployment. ' + 'Please visit https://aka.ms/linuxprovisioningerror ' + 'for more information on remediation.') + def azure_ds_telemetry_reporter(func): def impl(*args, **kwargs): @@ -180,12 +188,15 @@ def get_system_info(): return evt -def report_diagnostic_event(str): +def report_diagnostic_event( + msg: str, *, logger_func=None) -> events.ReportingEvent: """Report a diagnostic event""" + if callable(logger_func): + logger_func(msg) evt = events.ReportingEvent( DIAGNOSTIC_EVENT_TYPE, 'diagnostic message', - str, events.DEFAULT_EVENT_ORIGIN) - events.report_event(evt) + msg, events.DEFAULT_EVENT_ORIGIN) + events.report_event(evt, excluded_handler_types={"log"}) # return the event for unit testing purpose return evt @@ -211,27 +222,58 @@ def report_compressed_event(event_name, event_content): def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']): """Push a portion of cloud-init.log file or the whole file to KVP based on the file size. - If called more than once, it skips pushing the log file to KVP again.""" + The first time this function is called after VM boot, It will push the last + n bytes of the log file such that n < MAX_LOG_TO_KVP_LENGTH + If called again on the same boot, it continues from where it left off. + In addition to cloud-init.log, dmesg log will also be collected.""" - log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE)) - if log_pushed_to_kvp: - report_diagnostic_event("cloud-init.log is already pushed to KVP") - return + start_index = get_last_log_byte_pushed_to_kvp_index() LOG.debug("Dumping cloud-init.log file to KVP") try: with open(file_name, "rb") as f: f.seek(0, os.SEEK_END) - seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0) + seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, start_index) report_diagnostic_event( - "Dumping last {} bytes of cloud-init.log file to KVP".format( - f.tell() - seek_index)) + "Dumping last {0} bytes of cloud-init.log file to KVP starting" + " from index: {1}".format(f.tell() - seek_index, seek_index), + logger_func=LOG.debug) f.seek(seek_index, os.SEEK_SET) report_compressed_event("cloud-init.log", f.read()) - util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '') + util.write_file(LOG_PUSHED_TO_KVP_INDEX_FILE, str(f.tell())) + except Exception as ex: + report_diagnostic_event( + "Exception when dumping log file: %s" % repr(ex), + logger_func=LOG.warning) + + LOG.debug("Dumping dmesg log to KVP") + try: + out, _ = subp.subp(['dmesg'], decode=False, capture=True) + report_compressed_event("dmesg", out) except Exception as ex: - report_diagnostic_event("Exception when dumping log file: %s" % - repr(ex)) + report_diagnostic_event( + "Exception when dumping dmesg log: %s" % repr(ex), + logger_func=LOG.warning) + + +@azure_ds_telemetry_reporter +def get_last_log_byte_pushed_to_kvp_index(): + try: + with open(LOG_PUSHED_TO_KVP_INDEX_FILE, "r") as f: + return int(f.read()) + except IOError as e: + if e.errno != ENOENT: + report_diagnostic_event("Reading LOG_PUSHED_TO_KVP_INDEX_FILE" + " failed: %s." % repr(e), + logger_func=LOG.warning) + except ValueError as e: + report_diagnostic_event("Invalid value in LOG_PUSHED_TO_KVP_INDEX_FILE" + ": %s." % repr(e), + logger_func=LOG.warning) + except Exception as e: + report_diagnostic_event("Failed to get the last log byte pushed to KVP" + ": %s." % repr(e), logger_func=LOG.warning) + return 0 @contextmanager @@ -252,6 +294,54 @@ def _get_dhcp_endpoint_option_name(): return azure_endpoint +@azure_ds_telemetry_reporter +def http_with_retries(url, **kwargs) -> str: + """Wrapper around url_helper.readurl() with custom telemetry logging + that url_helper.readurl() does not provide. + """ + exc = None + + max_readurl_attempts = 240 + default_readurl_timeout = 5 + periodic_logging_attempts = 12 + + if 'timeout' not in kwargs: + kwargs['timeout'] = default_readurl_timeout + + # remove kwargs that cause url_helper.readurl to retry, + # since we are already implementing our own retry logic. + if kwargs.pop('retries', None): + LOG.warning( + 'Ignoring retries kwarg passed in for ' + 'communication with Azure endpoint.') + if kwargs.pop('infinite', None): + LOG.warning( + 'Ignoring infinite kwarg passed in for communication ' + 'with Azure endpoint.') + + for attempt in range(1, max_readurl_attempts + 1): + try: + ret = url_helper.readurl(url, **kwargs) + + report_diagnostic_event( + 'Successful HTTP request with Azure endpoint %s after ' + '%d attempts' % (url, attempt), + logger_func=LOG.debug) + + return ret + + except Exception as e: + exc = e + if attempt % periodic_logging_attempts == 0: + report_diagnostic_event( + 'Failed HTTP request with Azure endpoint %s during ' + 'attempt %d with exception: %s' % + (url, attempt, e), + logger_func=LOG.debug) + + raise exc + + class AzureEndpointHttpClient: headers = { @@ -270,16 +360,15 @@ class AzureEndpointHttpClient: if secure: headers = self.headers.copy() headers.update(self.extra_secure_headers) - return url_helper.readurl(url, headers=headers, - timeout=5, retries=10, sec_between=5) + return http_with_retries(url, headers=headers) def post(self, url, data=None, extra_headers=None): headers = self.headers if extra_headers is not None: headers = self.headers.copy() headers.update(extra_headers) - return url_helper.readurl(url, data=data, headers=headers, - timeout=5, retries=10, sec_between=5) + return http_with_retries( + url, data=data, headers=headers) class InvalidGoalStateXMLException(Exception): @@ -288,11 +377,16 @@ class InvalidGoalStateXMLException(Exception): class GoalState: - def __init__(self, unparsed_xml, azure_endpoint_client): + def __init__( + self, + unparsed_xml: str, + azure_endpoint_client: AzureEndpointHttpClient, + need_certificate: bool = True) -> None: """Parses a GoalState XML string and returns a GoalState object. @param unparsed_xml: string representing a GoalState XML. - @param azure_endpoint_client: instance of AzureEndpointHttpClient + @param azure_endpoint_client: instance of AzureEndpointHttpClient. + @param need_certificate: switch to know if certificates is needed. @return: GoalState object representing the GoalState XML string. """ self.azure_endpoint_client = azure_endpoint_client @@ -300,9 +394,9 @@ class GoalState: try: self.root = ElementTree.fromstring(unparsed_xml) except ElementTree.ParseError as e: - msg = 'Failed to parse GoalState XML: %s' - LOG.warning(msg, e) - report_diagnostic_event(msg % (e,)) + report_diagnostic_event( + 'Failed to parse GoalState XML: %s' % e, + logger_func=LOG.warning) raise self.container_id = self._text_from_xpath('./Container/ContainerId') @@ -312,16 +406,15 @@ class GoalState: for attr in ("container_id", "instance_id", "incarnation"): if getattr(self, attr) is None: - msg = 'Missing %s in GoalState XML' - LOG.warning(msg, attr) - report_diagnostic_event(msg % (attr,)) + msg = 'Missing %s in GoalState XML' % attr + report_diagnostic_event(msg, logger_func=LOG.warning) raise InvalidGoalStateXMLException(msg) self.certificates_xml = None url = self._text_from_xpath( './Container/RoleInstanceList/RoleInstance' '/Configuration/Certificates') - if url is not None: + if url is not None and need_certificate: with events.ReportEventStack( name="get-certificates-xml", description="get certificates xml", @@ -349,12 +442,20 @@ class OpenSSLManager: def __init__(self): self.tmpdir = temp_utils.mkdtemp() - self.certificate = None + self._certificate = None self.generate_certificate() def clean_up(self): util.del_dir(self.tmpdir) + @property + def certificate(self): + return self._certificate + + @certificate.setter + def certificate(self, value): + self._certificate = value + @azure_ds_telemetry_reporter def generate_certificate(self): LOG.debug('Generating certificate for communication with fabric...') @@ -477,8 +578,15 @@ class GoalStateHealthReporter: ''') PROVISIONING_SUCCESS_STATUS = 'Ready' + PROVISIONING_NOT_READY_STATUS = 'NotReady' + PROVISIONING_FAILURE_SUBSTATUS = 'ProvisioningFailed' + + HEALTH_REPORT_DESCRIPTION_TRIM_LEN = 512 - def __init__(self, goal_state, azure_endpoint_client, endpoint): + def __init__( + self, goal_state: GoalState, + azure_endpoint_client: AzureEndpointHttpClient, + endpoint: str) -> None: """Creates instance that will report provisioning status to an endpoint @param goal_state: An instance of class GoalState that contains @@ -495,7 +603,7 @@ class GoalStateHealthReporter: self._endpoint = endpoint @azure_ds_telemetry_reporter - def send_ready_signal(self): + def send_ready_signal(self) -> None: document = self.build_report( incarnation=self._goal_state.incarnation, container_id=self._goal_state.container_id, @@ -505,32 +613,52 @@ class GoalStateHealthReporter: try: self._post_health_report(document=document) except Exception as e: - msg = "exception while reporting ready: %s" % e - LOG.error(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + "exception while reporting ready: %s" % e, + logger_func=LOG.error) raise LOG.info('Reported ready to Azure fabric.') + @azure_ds_telemetry_reporter + def send_failure_signal(self, description: str) -> None: + document = self.build_report( + incarnation=self._goal_state.incarnation, + container_id=self._goal_state.container_id, + instance_id=self._goal_state.instance_id, + status=self.PROVISIONING_NOT_READY_STATUS, + substatus=self.PROVISIONING_FAILURE_SUBSTATUS, + description=description) + try: + self._post_health_report(document=document) + except Exception as e: + msg = "exception while reporting failure: %s" % e + report_diagnostic_event(msg, logger_func=LOG.error) + raise + + LOG.warning('Reported failure to Azure fabric.') + def build_report( - self, incarnation, container_id, instance_id, - status, substatus=None, description=None): + self, incarnation: str, container_id: str, instance_id: str, + status: str, substatus=None, description=None) -> str: health_detail = '' if substatus is not None: health_detail = self.HEALTH_DETAIL_SUBSECTION_XML_TEMPLATE.format( - health_substatus=substatus, health_description=description) + health_substatus=escape(substatus), + health_description=escape( + description[:self.HEALTH_REPORT_DESCRIPTION_TRIM_LEN])) health_report = self.HEALTH_REPORT_XML_TEMPLATE.format( - incarnation=incarnation, - container_id=container_id, - instance_id=instance_id, - health_status=status, + incarnation=escape(str(incarnation)), + container_id=escape(container_id), + instance_id=escape(instance_id), + health_status=escape(status), health_detail_subsection=health_detail) return health_report @azure_ds_telemetry_reporter - def _post_health_report(self, document): + def _post_health_report(self, document: str) -> None: push_log_to_kvp() # Whenever report_diagnostic_event(diagnostic_msg) is invoked in code, @@ -690,43 +818,52 @@ class WALinuxAgentShim: value = dhcp245 LOG.debug("Using Azure Endpoint from dhcp options") if value is None: - report_diagnostic_event("No Azure endpoint from dhcp options") - LOG.debug('Finding Azure endpoint from networkd...') + report_diagnostic_event( + 'No Azure endpoint from dhcp options. ' + 'Finding Azure endpoint from networkd...', + logger_func=LOG.debug) value = WALinuxAgentShim._networkd_get_value_from_leases() if value is None: # Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json # a dhclient exit hook that calls cloud-init-dhclient-hook - report_diagnostic_event("No Azure endpoint from networkd") - LOG.debug('Finding Azure endpoint from hook json...') + report_diagnostic_event( + 'No Azure endpoint from networkd. ' + 'Finding Azure endpoint from hook json...', + logger_func=LOG.debug) dhcp_options = WALinuxAgentShim._load_dhclient_json() value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options) if value is None: # Fallback and check the leases file if unsuccessful - report_diagnostic_event("No Azure endpoint from dhclient logs") - LOG.debug("Unable to find endpoint in dhclient logs. " - " Falling back to check lease files") + report_diagnostic_event( + 'No Azure endpoint from dhclient logs. ' + 'Unable to find endpoint in dhclient logs. ' + 'Falling back to check lease files', + logger_func=LOG.debug) if fallback_lease_file is None: - LOG.warning("No fallback lease file was specified.") + report_diagnostic_event( + 'No fallback lease file was specified.', + logger_func=LOG.warning) value = None else: - LOG.debug("Looking for endpoint in lease file %s", - fallback_lease_file) + report_diagnostic_event( + 'Looking for endpoint in lease file %s' + % fallback_lease_file, logger_func=LOG.debug) value = WALinuxAgentShim._get_value_from_leases_file( fallback_lease_file) if value is None: - msg = "No lease found; using default endpoint" - report_diagnostic_event(msg) - LOG.warning(msg) value = DEFAULT_WIRESERVER_ENDPOINT + report_diagnostic_event( + 'No lease found; using default endpoint: %s' % value, + logger_func=LOG.warning) endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value) - msg = 'Azure endpoint found at %s' % endpoint_ip_address - report_diagnostic_event(msg) - LOG.debug(msg) + report_diagnostic_event( + 'Azure endpoint found at %s' % endpoint_ip_address, + logger_func=LOG.debug) return endpoint_ip_address @azure_ds_telemetry_reporter - def register_with_azure_and_fetch_data(self, pubkey_info=None): + def register_with_azure_and_fetch_data(self, pubkey_info=None) -> dict: """Gets the VM's GoalState from Azure, uses the GoalState information to report ready/send the ready signal/provisioning complete signal to Azure, and then uses pubkey_info to filter and obtain the user's @@ -737,30 +874,56 @@ class WALinuxAgentShim: GoalState. @return: The list of user's authorized pubkey values. """ - if self.openssl_manager is None: + http_client_certificate = None + if self.openssl_manager is None and pubkey_info is not None: self.openssl_manager = OpenSSLManager() + http_client_certificate = self.openssl_manager.certificate if self.azure_endpoint_client is None: self.azure_endpoint_client = AzureEndpointHttpClient( - self.openssl_manager.certificate) - goal_state = self._fetch_goal_state_from_azure() - ssh_keys = self._get_user_pubkeys(goal_state, pubkey_info) + http_client_certificate) + goal_state = self._fetch_goal_state_from_azure( + need_certificate=http_client_certificate is not None + ) + ssh_keys = None + if pubkey_info is not None: + ssh_keys = self._get_user_pubkeys(goal_state, pubkey_info) health_reporter = GoalStateHealthReporter( goal_state, self.azure_endpoint_client, self.endpoint) health_reporter.send_ready_signal() return {'public-keys': ssh_keys} @azure_ds_telemetry_reporter - def _fetch_goal_state_from_azure(self): + def register_with_azure_and_report_failure(self, description: str) -> None: + """Gets the VM's GoalState from Azure, uses the GoalState information + to report failure/send provisioning failure signal to Azure. + + @param: user visible error description of provisioning failure. + """ + if self.azure_endpoint_client is None: + self.azure_endpoint_client = AzureEndpointHttpClient(None) + goal_state = self._fetch_goal_state_from_azure(need_certificate=False) + health_reporter = GoalStateHealthReporter( + goal_state, self.azure_endpoint_client, self.endpoint) + health_reporter.send_failure_signal(description=description) + + @azure_ds_telemetry_reporter + def _fetch_goal_state_from_azure( + self, + need_certificate: bool) -> GoalState: """Fetches the GoalState XML from the Azure endpoint, parses the XML, and returns a GoalState object. + @param need_certificate: switch to know if certificates is needed. @return: GoalState object representing the GoalState XML """ unparsed_goal_state_xml = self._get_raw_goal_state_xml_from_azure() - return self._parse_raw_goal_state_xml(unparsed_goal_state_xml) + return self._parse_raw_goal_state_xml( + unparsed_goal_state_xml, + need_certificate + ) @azure_ds_telemetry_reporter - def _get_raw_goal_state_xml_from_azure(self): + def _get_raw_goal_state_xml_from_azure(self) -> str: """Fetches the GoalState XML from the Azure endpoint and returns the XML as a string. @@ -770,40 +933,51 @@ class WALinuxAgentShim: LOG.info('Registering with Azure...') url = 'http://{}/machine/?comp=goalstate'.format(self.endpoint) try: - response = self.azure_endpoint_client.get(url) + with events.ReportEventStack( + name="goalstate-retrieval", + description="retrieve goalstate", + parent=azure_ds_reporter): + response = self.azure_endpoint_client.get(url) except Exception as e: - msg = 'failed to register with Azure: %s' % e - LOG.warning(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + 'failed to register with Azure and fetch GoalState XML: %s' + % e, logger_func=LOG.warning) raise LOG.debug('Successfully fetched GoalState XML.') return response.contents @azure_ds_telemetry_reporter - def _parse_raw_goal_state_xml(self, unparsed_goal_state_xml): + def _parse_raw_goal_state_xml( + self, + unparsed_goal_state_xml: str, + need_certificate: bool) -> GoalState: """Parses a GoalState XML string and returns a GoalState object. @param unparsed_goal_state_xml: GoalState XML string + @param need_certificate: switch to know if certificates is needed. @return: GoalState object representing the GoalState XML """ try: goal_state = GoalState( - unparsed_goal_state_xml, self.azure_endpoint_client) + unparsed_goal_state_xml, + self.azure_endpoint_client, + need_certificate + ) except Exception as e: - msg = 'Error processing GoalState XML: %s' % e - LOG.warning(msg) - report_diagnostic_event(msg) + report_diagnostic_event( + 'Error processing GoalState XML: %s' % e, + logger_func=LOG.warning) raise msg = ', '.join([ 'GoalState XML container id: %s' % goal_state.container_id, 'GoalState XML instance id: %s' % goal_state.instance_id, 'GoalState XML incarnation: %s' % goal_state.incarnation]) - LOG.debug(msg) - report_diagnostic_event(msg) + report_diagnostic_event(msg, logger_func=LOG.debug) return goal_state @azure_ds_telemetry_reporter - def _get_user_pubkeys(self, goal_state, pubkey_info): + def _get_user_pubkeys( + self, goal_state: GoalState, pubkey_info: list) -> list: """Gets and filters the VM admin user's authorized pubkeys. The admin user in this case is the username specified as "admin" @@ -838,7 +1012,7 @@ class WALinuxAgentShim: return ssh_keys @staticmethod - def _filter_pubkeys(keys_by_fingerprint, pubkey_info): + def _filter_pubkeys(keys_by_fingerprint: dict, pubkey_info: list) -> list: """ Filter and return only the user's actual pubkeys. @param keys_by_fingerprint: pubkey fingerprint -> pubkey value dict @@ -879,9 +1053,25 @@ def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None, shim.clean_up() +@azure_ds_telemetry_reporter +def report_failure_to_fabric(fallback_lease_file=None, dhcp_opts=None, + description=None): + shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file, + dhcp_options=dhcp_opts) + if not description: + description = DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE + try: + shim.register_with_azure_and_report_failure( + description=description) + finally: + shim.clean_up() + + def dhcp_log_cb(out, err): - report_diagnostic_event("dhclient output stream: %s" % out) - report_diagnostic_event("dhclient error stream: %s" % err) + report_diagnostic_event( + "dhclient output stream: %s" % out, logger_func=LOG.debug) + report_diagnostic_event( + "dhclient error stream: %s" % err, logger_func=LOG.debug) class EphemeralDHCPv4WithReporting: diff --git a/cloudinit/sources/helpers/digitalocean.py b/cloudinit/sources/helpers/digitalocean.py index b545c4d6..f9be4ecb 100644 --- a/cloudinit/sources/helpers/digitalocean.py +++ b/cloudinit/sources/helpers/digitalocean.py @@ -5,6 +5,7 @@ import json import random +from cloudinit import dmi from cloudinit import log as logging from cloudinit import net as cloudnet from cloudinit import url_helper @@ -195,11 +196,11 @@ def read_sysinfo(): # SMBIOS information # Detect if we are on DigitalOcean and return the Droplet's ID - vendor_name = util.read_dmi_data("system-manufacturer") + vendor_name = dmi.read_dmi_data("system-manufacturer") if vendor_name != "DigitalOcean": return (False, None) - droplet_id = util.read_dmi_data("system-serial-number") + droplet_id = dmi.read_dmi_data("system-serial-number") if droplet_id: LOG.debug("system identified via SMBIOS as DigitalOcean Droplet: %s", droplet_id) diff --git a/cloudinit/sources/helpers/hetzner.py b/cloudinit/sources/helpers/hetzner.py index 72edb023..33dc4c53 100644 --- a/cloudinit/sources/helpers/hetzner.py +++ b/cloudinit/sources/helpers/hetzner.py @@ -3,15 +3,12 @@ # # This file is part of cloud-init. See LICENSE file for license information. -from cloudinit import log as logging from cloudinit import url_helper from cloudinit import util import base64 import binascii -LOG = logging.getLogger(__name__) - def read_metadata(url, timeout=2, sec_between=2, retries=30): response = url_helper.readurl(url, timeout=timeout, diff --git a/cloudinit/sources/helpers/netlink.py b/cloudinit/sources/helpers/netlink.py index c2ad587b..e13d6834 100644 --- a/cloudinit/sources/helpers/netlink.py +++ b/cloudinit/sources/helpers/netlink.py @@ -185,6 +185,54 @@ def read_rta_oper_state(data): return InterfaceOperstate(ifname, operstate) +def wait_for_nic_attach_event(netlink_socket, existing_nics): + '''Block until a single nic is attached. + + :param: netlink_socket: netlink_socket to receive events + :param: existing_nics: List of existing nics so that we can skip them. + :raises: AssertionError if netlink_socket is none. + ''' + LOG.debug("Preparing to wait for nic attach.") + ifname = None + + def should_continue_cb(iname, carrier, prevCarrier): + if iname in existing_nics: + return True + nonlocal ifname + ifname = iname + return False + + # We can return even if the operational state of the new nic is DOWN + # because we set it to UP before doing dhcp. + read_netlink_messages(netlink_socket, + None, + [RTM_NEWLINK], + [OPER_UP, OPER_DOWN], + should_continue_cb) + return ifname + + +def wait_for_nic_detach_event(netlink_socket): + '''Block until a single nic is detached and its operational state is down. + + :param: netlink_socket: netlink_socket to receive events. + ''' + LOG.debug("Preparing to wait for nic detach.") + ifname = None + + def should_continue_cb(iname, carrier, prevCarrier): + nonlocal ifname + ifname = iname + return False + + read_netlink_messages(netlink_socket, + None, + [RTM_DELLINK], + [OPER_DOWN], + should_continue_cb) + return ifname + + def wait_for_media_disconnect_connect(netlink_socket, ifname): '''Block until media disconnect and connect has happened on an interface. Listens on netlink socket to receive netlink events and when the carrier @@ -198,10 +246,42 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname): assert (netlink_socket is not None), ("netlink socket is none") assert (ifname is not None), ("interface name is none") assert (len(ifname) > 0), ("interface name cannot be empty") + + def should_continue_cb(iname, carrier, prevCarrier): + # check for carrier down, up sequence + isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP) + if isVnetSwitch: + LOG.debug("Media switch happened on %s.", ifname) + return False + return True + + LOG.debug("Wait for media disconnect and reconnect to happen") + read_netlink_messages(netlink_socket, + ifname, + [RTM_NEWLINK, RTM_DELLINK], + [OPER_UP, OPER_DOWN], + should_continue_cb) + + +def read_netlink_messages(netlink_socket, + ifname_filter, + rtm_types, + operstates, + should_continue_callback): + ''' Reads from the netlink socket until the condition specified by + the continuation callback is met. + + :param: netlink_socket: netlink_socket to receive events. + :param: ifname_filter: if not None, will only listen for this interface. + :param: rtm_types: Type of netlink events to listen for. + :param: operstates: Operational states to listen. + :param: should_continue_callback: Specifies when to stop listening. + ''' + if netlink_socket is None: + raise RuntimeError("Netlink socket is none") + data = bytes() carrier = OPER_UP prevCarrier = OPER_UP - data = bytes() - LOG.debug("Wait for media disconnect and reconnect to happen") while True: recv_data = read_netlink_socket(netlink_socket, SELECT_TIMEOUT) if recv_data is None: @@ -223,26 +303,26 @@ def wait_for_media_disconnect_connect(netlink_socket, ifname): padlen = (nlheader.length+PAD_ALIGNMENT-1) & ~(PAD_ALIGNMENT-1) offset = offset + padlen LOG.debug('offset to next netlink message: %d', offset) - # Ignore any messages not new link or del link - if nlheader.type not in [RTM_NEWLINK, RTM_DELLINK]: + # Continue if we are not interested in this message. + if nlheader.type not in rtm_types: continue interface_state = read_rta_oper_state(nl_msg) if interface_state is None: LOG.debug('Failed to read rta attributes: %s', interface_state) continue - if interface_state.ifname != ifname: + if (ifname_filter is not None and + interface_state.ifname != ifname_filter): LOG.debug( "Ignored netlink event on interface %s. Waiting for %s.", - interface_state.ifname, ifname) + interface_state.ifname, ifname_filter) continue - if interface_state.operstate not in [OPER_UP, OPER_DOWN]: + if interface_state.operstate not in operstates: continue prevCarrier = carrier carrier = interface_state.operstate - # check for carrier down, up sequence - isVnetSwitch = (prevCarrier == OPER_DOWN) and (carrier == OPER_UP) - if isVnetSwitch: - LOG.debug("Media switch happened on %s.", ifname) + if not should_continue_callback(interface_state.ifname, + carrier, + prevCarrier): return data = data[offset:] diff --git a/cloudinit/sources/helpers/openstack.py b/cloudinit/sources/helpers/openstack.py index 65e020c5..3e6365f1 100644 --- a/cloudinit/sources/helpers/openstack.py +++ b/cloudinit/sources/helpers/openstack.py @@ -602,11 +602,17 @@ def convert_net_json(network_json=None, known_macs=None): elif network['type'] in ['ipv6_slaac', 'ipv6_dhcpv6-stateless', 'ipv6_dhcpv6-stateful']: subnet.update({'type': network['type']}) - elif network['type'] in ['ipv4', 'ipv6']: + elif network['type'] in ['ipv4', 'static']: subnet.update({ 'type': 'static', 'address': network.get('ip_address'), }) + elif network['type'] in ['ipv6', 'static6']: + cfg.update({'accept-ra': False}) + subnet.update({ + 'type': 'static6', + 'address': network.get('ip_address'), + }) # Enable accept_ra for stateful and legacy ipv6_dhcp types if network['type'] in ['ipv6_dhcpv6-stateful', 'ipv6_dhcp']: diff --git a/cloudinit/sources/helpers/tests/test_netlink.py b/cloudinit/sources/helpers/tests/test_netlink.py index 10760bd6..cafe3961 100644 --- a/cloudinit/sources/helpers/tests/test_netlink.py +++ b/cloudinit/sources/helpers/tests/test_netlink.py @@ -9,9 +9,10 @@ import codecs from cloudinit.sources.helpers.netlink import ( NetlinkCreateSocketError, create_bound_netlink_socket, read_netlink_socket, read_rta_oper_state, unpack_rta_attr, wait_for_media_disconnect_connect, + wait_for_nic_attach_event, wait_for_nic_detach_event, OPER_DOWN, OPER_UP, OPER_DORMANT, OPER_LOWERLAYERDOWN, OPER_NOTPRESENT, - OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_SETLINK, - RTM_GETLINK, MAX_SIZE) + OPER_TESTING, OPER_UNKNOWN, RTATTR_START_OFFSET, RTM_NEWLINK, RTM_DELLINK, + RTM_SETLINK, RTM_GETLINK, MAX_SIZE) def int_to_bytes(i): @@ -135,6 +136,75 @@ class TestParseNetlinkMessage(CiTestCase): @mock.patch('cloudinit.sources.helpers.netlink.socket.socket') @mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket') +class TestNicAttachDetach(CiTestCase): + with_logs = True + + def _media_switch_data(self, ifname, msg_type, operstate): + '''construct netlink data with specified fields''' + if ifname and operstate is not None: + data = bytearray(48) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4sHHc", data, RTATTR_START_OFFSET, 8, 3, + bytes, 5, 16, int_to_bytes(operstate)) + elif ifname: + data = bytearray(40) + bytes = ifname.encode("utf-8") + struct.pack_into("HH4s", data, RTATTR_START_OFFSET, 8, 3, bytes) + elif operstate: + data = bytearray(40) + struct.pack_into("HHc", data, RTATTR_START_OFFSET, 5, 16, + int_to_bytes(operstate)) + struct.pack_into("=LHHLL", data, 0, len(data), msg_type, 0, 0, 0) + return data + + def test_nic_attached_oper_down(self, m_read_netlink_socket, m_socket): + '''Test for a new nic attached''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_op_down] + ifread = wait_for_nic_attach_event(m_socket, []) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + def test_nic_attached_oper_up(self, m_read_netlink_socket, m_socket): + '''Test for a new nic attached''' + ifname = "eth0" + data_op_up = self._media_switch_data(ifname, RTM_NEWLINK, OPER_UP) + m_read_netlink_socket.side_effect = [data_op_up] + ifread = wait_for_nic_attach_event(m_socket, []) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + def test_nic_attach_ignore_existing(self, m_read_netlink_socket, m_socket): + '''Test that we read only the interfaces we are interested in.''' + data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN) + data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_eth0, data_eth1] + ifread = wait_for_nic_attach_event(m_socket, ["eth0"]) + self.assertEqual(m_read_netlink_socket.call_count, 2) + self.assertEqual("eth1", ifread) + + def test_nic_attach_read_first(self, m_read_netlink_socket, m_socket): + '''Test that we read only the interfaces we are interested in.''' + data_eth0 = self._media_switch_data("eth0", RTM_NEWLINK, OPER_DOWN) + data_eth1 = self._media_switch_data("eth1", RTM_NEWLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_eth0, data_eth1] + ifread = wait_for_nic_attach_event(m_socket, ["eth1"]) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual("eth0", ifread) + + def test_nic_detached(self, m_read_netlink_socket, m_socket): + '''Test for an existing nic detached''' + ifname = "eth0" + data_op_down = self._media_switch_data(ifname, RTM_DELLINK, OPER_DOWN) + m_read_netlink_socket.side_effect = [data_op_down] + ifread = wait_for_nic_detach_event(m_socket) + self.assertEqual(m_read_netlink_socket.call_count, 1) + self.assertEqual(ifname, ifread) + + +@mock.patch('cloudinit.sources.helpers.netlink.socket.socket') +@mock.patch('cloudinit.sources.helpers.netlink.read_netlink_socket') class TestWaitForMediaDisconnectConnect(CiTestCase): with_logs = True diff --git a/cloudinit/sources/helpers/vmware/imc/config_nic.py b/cloudinit/sources/helpers/vmware/imc/config_nic.py index 3745a262..9cd2c0c0 100644 --- a/cloudinit/sources/helpers/vmware/imc/config_nic.py +++ b/cloudinit/sources/helpers/vmware/imc/config_nic.py @@ -275,6 +275,7 @@ class NicConfigurator(object): "# DO NOT EDIT THIS FILE BY HAND --" " AUTOMATICALLY GENERATED BY cloud-init", "source /etc/network/interfaces.d/*.cfg", + "source-directory /etc/network/interfaces.d", ] util.write_file(interfaceFile, content='\n'.join(lines)) diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py index 7bd23813..a7bbdfd9 100644 --- a/cloudinit/sources/tests/test_oracle.py +++ b/cloudinit/sources/tests/test_oracle.py @@ -153,20 +153,20 @@ class TestDataSourceOracle: class TestIsPlatformViable(test_helpers.CiTestCase): - @mock.patch(DS_PATH + ".util.read_dmi_data", + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value=oracle.CHASSIS_ASSET_TAG) def test_expected_viable(self, m_read_dmi_data): """System with known chassis tag is viable.""" self.assertTrue(oracle._is_platform_viable()) m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) - @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None) + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value=None) def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data): """System without known chassis tag is not viable.""" self.assertFalse(oracle._is_platform_viable()) m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) - @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs") + @mock.patch(DS_PATH + ".dmi.read_dmi_data", return_value="LetsGoCubs") def test_expected_not_viable_other(self, m_read_dmi_data): """System with unnown chassis tag is not viable.""" self.assertFalse(oracle._is_platform_viable()) diff --git a/cloudinit/ssh_util.py b/cloudinit/ssh_util.py index c08042d6..d5113996 100644 --- a/cloudinit/ssh_util.py +++ b/cloudinit/ssh_util.py @@ -262,13 +262,13 @@ def extract_authorized_keys(username, sshd_cfg_file=DEF_SSHD_CFG): except (IOError, OSError): # Give up and use a default key filename - auth_key_fns[0] = default_authorizedkeys_file + auth_key_fns.append(default_authorizedkeys_file) util.logexc(LOG, "Failed extracting 'AuthorizedKeysFile' in SSH " "config from %r, using 'AuthorizedKeysFile' file " "%r instead", DEF_SSHD_CFG, auth_key_fns[0]) - # always store all the keys in the user's private file - return (default_authorizedkeys_file, parse_authorized_keys(auth_key_fns)) + # always store all the keys in the first file configured on sshd_config + return (auth_key_fns[0], parse_authorized_keys(auth_key_fns)) def setup_user_keys(keys, username, options=None): diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 765f4aab..0cce6e80 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -148,7 +148,7 @@ class Init(object): util.ensure_dirs(self._initial_subdirs()) log_file = util.get_cfg_option_str(self.cfg, 'def_log_file') if log_file: - util.ensure_file(log_file) + util.ensure_file(log_file, preserve_mode=True) perms = self.cfg.get('syslog_fix_perms') if not perms: perms = {} diff --git a/cloudinit/subp.py b/cloudinit/subp.py index 3e4efa42..024e1a98 100644 --- a/cloudinit/subp.py +++ b/cloudinit/subp.py @@ -144,7 +144,7 @@ class ProcessExecutionError(IOError): def subp(args, data=None, rcs=None, env=None, capture=True, combine_capture=False, shell=False, logstring=False, decode="replace", target=None, update_env=None, - status_cb=None): + status_cb=None, cwd=None): """Run a subprocess. :param args: command to run in a list. [cmd, arg1, arg2...] @@ -181,6 +181,8 @@ def subp(args, data=None, rcs=None, env=None, capture=True, :param status_cb: call this fuction with a single string argument before starting and after finishing. + :param cwd: + change the working directory to cwd before executing the command. :return if not capturing, return is (None, None) @@ -254,7 +256,7 @@ def subp(args, data=None, rcs=None, env=None, capture=True, try: sp = subprocess.Popen(bytes_args, stdout=stdout, stderr=stderr, stdin=stdin, - env=env, shell=shell) + env=env, shell=shell, cwd=cwd) (out, err) = sp.communicate(data) except OSError as e: if status_cb: diff --git a/cloudinit/tests/test_dmi.py b/cloudinit/tests/test_dmi.py new file mode 100644 index 00000000..78a72122 --- /dev/null +++ b/cloudinit/tests/test_dmi.py @@ -0,0 +1,154 @@ +from cloudinit.tests import helpers +from cloudinit import dmi +from cloudinit import util +from cloudinit import subp + +import os +import tempfile +import shutil +from unittest import mock + + +class TestReadDMIData(helpers.FilesystemMockingTestCase): + + def setUp(self): + super(TestReadDMIData, self).setUp() + self.new_root = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.new_root) + self.reRoot(self.new_root) + p = mock.patch("cloudinit.dmi.is_container", return_value=False) + self.addCleanup(p.stop) + self._m_is_container = p.start() + p = mock.patch("cloudinit.dmi.is_FreeBSD", return_value=False) + self.addCleanup(p.stop) + self._m_is_FreeBSD = p.start() + + def _create_sysfs_parent_directory(self): + util.ensure_dir(os.path.join('sys', 'class', 'dmi', 'id')) + + def _create_sysfs_file(self, key, content): + """Mocks the sys path found on Linux systems.""" + self._create_sysfs_parent_directory() + dmi_key = "/sys/class/dmi/id/{0}".format(key) + util.write_file(dmi_key, content) + + def _configure_dmidecode_return(self, key, content, error=None): + """ + In order to test a missing sys path and call outs to dmidecode, this + function fakes the results of dmidecode to test the results. + """ + def _dmidecode_subp(cmd): + if cmd[-1] != key: + raise subp.ProcessExecutionError() + return (content, error) + + self.patched_funcs.enter_context( + mock.patch("cloudinit.dmi.subp.which", side_effect=lambda _: True)) + self.patched_funcs.enter_context( + mock.patch("cloudinit.dmi.subp.subp", side_effect=_dmidecode_subp)) + + def _configure_kenv_return(self, key, content, error=None): + """ + In order to test a FreeBSD system call outs to kenv, this + function fakes the results of kenv to test the results. + """ + def _kenv_subp(cmd): + if cmd[-1] != dmi.DMIDECODE_TO_KERNEL[key].freebsd: + raise subp.ProcessExecutionError() + return (content, error) + + self.patched_funcs.enter_context( + mock.patch("cloudinit.dmi.subp.subp", side_effect=_kenv_subp)) + + def patch_mapping(self, new_mapping): + self.patched_funcs.enter_context( + mock.patch('cloudinit.dmi.DMIDECODE_TO_KERNEL', + new_mapping)) + + def test_sysfs_used_with_key_in_mapping_and_file_on_disk(self): + self.patch_mapping({'mapped-key': dmi.kdmi('mapped-value', None)}) + expected_dmi_value = 'sys-used-correctly' + self._create_sysfs_file('mapped-value', expected_dmi_value) + self._configure_dmidecode_return('mapped-key', 'wrong-wrong-wrong') + self.assertEqual(expected_dmi_value, dmi.read_dmi_data('mapped-key')) + + def test_dmidecode_used_if_no_sysfs_file_on_disk(self): + self.patch_mapping({}) + self._create_sysfs_parent_directory() + expected_dmi_value = 'dmidecode-used' + self._configure_dmidecode_return('use-dmidecode', expected_dmi_value) + with mock.patch("cloudinit.util.os.uname") as m_uname: + m_uname.return_value = ('x-sysname', 'x-nodename', + 'x-release', 'x-version', 'x86_64') + self.assertEqual(expected_dmi_value, + dmi.read_dmi_data('use-dmidecode')) + + def test_dmidecode_not_used_on_arm(self): + self.patch_mapping({}) + print("current =%s", subp) + self._create_sysfs_parent_directory() + dmi_val = 'from-dmidecode' + dmi_name = 'use-dmidecode' + self._configure_dmidecode_return(dmi_name, dmi_val) + print("now =%s", subp) + + expected = {'armel': None, 'aarch64': dmi_val, 'x86_64': dmi_val} + found = {} + # we do not run the 'dmi-decode' binary on some arches + # verify that anything requested that is not in the sysfs dir + # will return None on those arches. + with mock.patch("cloudinit.util.os.uname") as m_uname: + for arch in expected: + m_uname.return_value = ('x-sysname', 'x-nodename', + 'x-release', 'x-version', arch) + print("now2 =%s", subp) + found[arch] = dmi.read_dmi_data(dmi_name) + self.assertEqual(expected, found) + + def test_none_returned_if_neither_source_has_data(self): + self.patch_mapping({}) + self._configure_dmidecode_return('key', 'value') + self.assertIsNone(dmi.read_dmi_data('expect-fail')) + + def test_none_returned_if_dmidecode_not_in_path(self): + self.patched_funcs.enter_context( + mock.patch.object(subp, 'which', lambda _: False)) + self.patch_mapping({}) + self.assertIsNone(dmi.read_dmi_data('expect-fail')) + + def test_empty_string_returned_instead_of_foxfox(self): + # uninitialized dmi values show as \xff, return empty string + my_len = 32 + dmi_value = b'\xff' * my_len + b'\n' + expected = "" + dmi_key = 'system-product-name' + sysfs_key = 'product_name' + self._create_sysfs_file(sysfs_key, dmi_value) + self.assertEqual(expected, dmi.read_dmi_data(dmi_key)) + + def test_container_returns_none(self): + """In a container read_dmi_data should always return None.""" + + # first verify we get the value if not in container + self._m_is_container.return_value = False + key, val = ("system-product-name", "my_product") + self._create_sysfs_file('product_name', val) + self.assertEqual(val, dmi.read_dmi_data(key)) + + # then verify in container returns None + self._m_is_container.return_value = True + self.assertIsNone(dmi.read_dmi_data(key)) + + def test_container_returns_none_on_unknown(self): + """In a container even bogus keys return None.""" + self._m_is_container.return_value = True + self._create_sysfs_file('product_name', "should-be-ignored") + self.assertIsNone(dmi.read_dmi_data("bogus")) + self.assertIsNone(dmi.read_dmi_data("system-product-name")) + + def test_freebsd_uses_kenv(self): + """On a FreeBSD system, kenv is called.""" + self._m_is_FreeBSD.return_value = True + key, val = ("system-product-name", "my_product") + self._configure_kenv_return(key, val) + self.assertEqual(dmi.read_dmi_data(key), val) diff --git a/cloudinit/tests/test_gpg.py b/cloudinit/tests/test_gpg.py index f96f5372..311dfad6 100644 --- a/cloudinit/tests/test_gpg.py +++ b/cloudinit/tests/test_gpg.py @@ -49,6 +49,7 @@ class TestReceiveKeys(CiTestCase): m_subp.return_value = ('', '') gpg.recv_key(key, keyserver, retries=retries) m_subp.assert_called_once_with( - ['gpg', '--keyserver=%s' % keyserver, '--recv-keys', key], + ['gpg', '--no-tty', + '--keyserver=%s' % keyserver, '--recv-keys', key], capture=True) m_sleep.assert_not_called() diff --git a/cloudinit/tests/test_persistence.py b/cloudinit/tests/test_persistence.py new file mode 100644 index 00000000..ec1152a9 --- /dev/null +++ b/cloudinit/tests/test_persistence.py @@ -0,0 +1,127 @@ +# Copyright (C) 2020 Canonical Ltd. +# +# Author: Daniel Watkins <oddbloke@ubuntu.com> +# +# This file is part of cloud-init. See LICENSE file for license information. +""" +Tests for cloudinit.persistence. + +Per https://docs.python.org/3/library/pickle.html, only "classes that are +defined at the top level of a module" can be pickled. This means that all of +our ``CloudInitPickleMixin`` subclasses for testing must be defined at +module-level (rather than being defined inline or dynamically in the body of +test methods, as we would do without this constraint). + +``TestPickleMixin.test_subclasses`` iterates over a list of all of these +classes, and tests that they round-trip through a pickle dump/load. As the +interface we're testing is that ``_unpickle`` is called appropriately on +subclasses, our subclasses define their assertions in their ``_unpickle`` +implementation. (This means that the assertions will not be executed if +``_unpickle`` is not called at all; we have +``TestPickleMixin.test_unpickle_called`` to ensure it is called.) + +To avoid manually maintaining a list of classes for parametrization we use a +simple metaclass, ``_Collector``, to gather them up. +""" + +import pickle +from unittest import mock + +import pytest + +from cloudinit.persistence import CloudInitPickleMixin + + +class _Collector(type): + """Any class using this as a metaclass will be stored in test_classes.""" + + test_classes = [] + + def __new__(cls, *args): + new_cls = super().__new__(cls, *args) + _Collector.test_classes.append(new_cls) + return new_cls + + +class InstanceVersionNotUsed(CloudInitPickleMixin, metaclass=_Collector): + """Test that the class version is used over one set in instance state.""" + + _ci_pkl_version = 1 + + def __init__(self): + self._ci_pkl_version = 2 + + def _unpickle(self, ci_pkl_version: int) -> None: + assert 1 == ci_pkl_version + + +class MissingVersionHandled(CloudInitPickleMixin, metaclass=_Collector): + """Test that pickles without ``_ci_pkl_version`` are handled gracefully. + + This is tested by overriding ``__getstate__`` so the dumped pickle of this + class will not have ``_ci_pkl_version`` included. + """ + + def __getstate__(self): + return self.__dict__ + + def _unpickle(self, ci_pkl_version: int) -> None: + assert 0 == ci_pkl_version + + +class OverridenVersionHonored(CloudInitPickleMixin, metaclass=_Collector): + """Test that the subclass's version is used.""" + + _ci_pkl_version = 1 + + def _unpickle(self, ci_pkl_version: int) -> None: + assert 1 == ci_pkl_version + + +class StateIsRestored(CloudInitPickleMixin, metaclass=_Collector): + """Instance state should be restored before ``_unpickle`` is called.""" + + def __init__(self): + self.some_state = "some state" + + def _unpickle(self, ci_pkl_version: int) -> None: + assert "some state" == self.some_state + + +class UnpickleCanBeUnoverriden(CloudInitPickleMixin, metaclass=_Collector): + """Subclasses should not need to override ``_unpickle``.""" + + +class VersionDefaultsToZero(CloudInitPickleMixin, metaclass=_Collector): + """Test that the default version is 0.""" + + def _unpickle(self, ci_pkl_version: int) -> None: + assert 0 == ci_pkl_version + + +class VersionIsPoppedFromState(CloudInitPickleMixin, metaclass=_Collector): + """Test _ci_pkl_version is popped from state before being restored.""" + + def _unpickle(self, ci_pkl_version: int) -> None: + # `self._ci_pkl_version` returns the type's _ci_pkl_version if it isn't + # in instance state, so we need to explicitly check self.__dict__. + assert "_ci_pkl_version" not in self.__dict__ + + +class TestPickleMixin: + def test_unpickle_called(self): + """Test that self._unpickle is called on unpickle.""" + with mock.patch.object( + CloudInitPickleMixin, "_unpickle" + ) as m_unpickle: + pickle.loads(pickle.dumps(CloudInitPickleMixin())) + assert 1 == m_unpickle.call_count + + @pytest.mark.parametrize("cls", _Collector.test_classes) + def test_subclasses(self, cls): + """For each collected class, round-trip through pickle dump/load. + + Assertions are implemented in ``cls._unpickle``, and so are evoked as + part of the pickle load. + """ + pickle.loads(pickle.dumps(cls())) diff --git a/cloudinit/tests/test_stages.py b/cloudinit/tests/test_stages.py index d5c9c0e4..d2d1b37f 100644 --- a/cloudinit/tests/test_stages.py +++ b/cloudinit/tests/test_stages.py @@ -3,6 +3,9 @@ """Tests related to cloudinit.stages module.""" import os +import stat + +import pytest from cloudinit import stages from cloudinit import sources @@ -341,4 +344,63 @@ class TestInit(CiTestCase): self.init.distro.apply_network_config.assert_called_with( net_cfg, bring_up=True) + +class TestInit_InitializeFilesystem: + """Tests for cloudinit.stages.Init._initialize_filesystem. + + TODO: Expand these tests to cover all of _initialize_filesystem's behavior. + """ + + @pytest.yield_fixture + def init(self, paths): + """A fixture which yields a stages.Init instance with paths and cfg set + + As it is replaced with a mock, consumers of this fixture can set + `init.cfg` if the default empty dict configuration is not appropriate. + """ + with mock.patch( + "cloudinit.stages.Init.cfg", mock.PropertyMock(return_value={}) + ): + with mock.patch("cloudinit.stages.util.ensure_dirs"): + init = stages.Init() + init._paths = paths + yield init + + @mock.patch("cloudinit.stages.util.ensure_file") + def test_ensure_file_not_called_if_no_log_file_configured( + self, m_ensure_file, init + ): + """If no log file is configured, we should not ensure its existence.""" + init.cfg = {} + + init._initialize_filesystem() + + assert 0 == m_ensure_file.call_count + + def test_log_files_existence_is_ensured_if_configured(self, init, tmpdir): + """If a log file is configured, we should ensure its existence.""" + log_file = tmpdir.join("cloud-init.log") + init.cfg = {"def_log_file": str(log_file)} + + init._initialize_filesystem() + + assert log_file.exists + + def test_existing_file_permissions_are_not_modified(self, init, tmpdir): + """If the log file already exists, we should not modify its permissions + + See https://bugs.launchpad.net/cloud-init/+bug/1900837. + """ + # Use a mode that will never be made the default so this test will + # always be valid + mode = 0o606 + log_file = tmpdir.join("cloud-init.log") + log_file.ensure() + log_file.chmod(mode) + init.cfg = {"def_log_file": str(log_file)} + + init._initialize_filesystem() + + assert mode == stat.S_IMODE(log_file.stat().mode) + # vi: ts=4 expandtab diff --git a/cloudinit/tests/test_upgrade.py b/cloudinit/tests/test_upgrade.py new file mode 100644 index 00000000..f79a2536 --- /dev/null +++ b/cloudinit/tests/test_upgrade.py @@ -0,0 +1,45 @@ +# Copyright (C) 2020 Canonical Ltd. +# +# Author: Daniel Watkins <oddbloke@ubuntu.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + +"""Upgrade testing for cloud-init. + +This module tests cloud-init's behaviour across upgrades. Specifically, it +specifies a set of invariants that the current codebase expects to be true (as +tests in ``TestUpgrade``) and then checks that these hold true after unpickling +``obj.pkl``s from previous versions of cloud-init; those pickles are stored in +``tests/data/old_pickles/``. +""" + +import operator +import pathlib + +import pytest + +from cloudinit.stages import _pkl_load +from cloudinit.tests.helpers import resourceLocation + + +class TestUpgrade: + @pytest.fixture( + params=pathlib.Path(resourceLocation("old_pickles")).glob("*.pkl"), + scope="class", + ids=operator.attrgetter("name"), + ) + def previous_obj_pkl(self, request): + """Load each pickle to memory once, then run all tests against it. + + Test implementations _must not_ modify the ``previous_obj_pkl`` which + they are passed, as that will affect tests that run after them. + """ + return _pkl_load(str(request.param)) + + def test_networking_set_on_distro(self, previous_obj_pkl): + """We always expect to have ``.networking`` on ``Distro`` objects.""" + assert previous_obj_pkl.distro.networking is not None + + def test_blacklist_drivers_set_on_networking(self, previous_obj_pkl): + """We always expect Networking.blacklist_drivers to be initialised.""" + assert previous_obj_pkl.distro.networking.blacklist_drivers is None diff --git a/cloudinit/tests/test_util.py b/cloudinit/tests/test_util.py index 096a3037..b7a302f1 100644 --- a/cloudinit/tests/test_util.py +++ b/cloudinit/tests/test_util.py @@ -730,6 +730,41 @@ class TestMountCb: """already_mounted_device_and_mountdict, but return only the device""" return already_mounted_device_and_mountdict[0] + @pytest.mark.parametrize( + "mtype,expected", + [ + # While the filesystem is called iso9660, the mount type is cd9660 + ("iso9660", "cd9660"), + # vfat is generally called "msdos" on BSD + ("vfat", "msdos"), + # judging from man pages, only FreeBSD has this alias + ("msdosfs", "msdos"), + # Test happy path + ("ufs", "ufs") + ], + ) + @mock.patch("cloudinit.util.is_Linux", autospec=True) + @mock.patch("cloudinit.util.is_BSD", autospec=True) + @mock.patch("cloudinit.util.subp.subp") + @mock.patch("cloudinit.temp_utils.tempdir", autospec=True) + def test_normalize_mtype_on_bsd( + self, m_tmpdir, m_subp, m_is_BSD, m_is_Linux, mtype, expected + ): + m_is_BSD.return_value = True + m_is_Linux.return_value = False + m_tmpdir.return_value.__enter__ = mock.Mock( + autospec=True, return_value="/tmp/fake" + ) + m_tmpdir.return_value.__exit__ = mock.Mock( + autospec=True, return_value=True + ) + callback = mock.Mock(autospec=True) + + util.mount_cb('/dev/fake0', callback, mtype=mtype) + assert mock.call( + ["mount", "-o", "ro", "-t", expected, "/dev/fake0", "/tmp/fake"], + update_env=None) in m_subp.call_args_list + @pytest.mark.parametrize("invalid_mtype", [int(0), float(0.0), dict()]) def test_typeerror_raised_for_invalid_mtype(self, invalid_mtype): with pytest.raises(TypeError): @@ -771,4 +806,49 @@ class TestMountCb: ] == callback.call_args_list +@mock.patch("cloudinit.util.write_file") +class TestEnsureFile: + """Tests for ``cloudinit.util.ensure_file``.""" + + def test_parameters_passed_through(self, m_write_file): + """Test the parameters in the signature are passed to write_file.""" + util.ensure_file( + mock.sentinel.path, + mode=mock.sentinel.mode, + preserve_mode=mock.sentinel.preserve_mode, + ) + + assert 1 == m_write_file.call_count + args, kwargs = m_write_file.call_args + assert (mock.sentinel.path,) == args + assert mock.sentinel.mode == kwargs["mode"] + assert mock.sentinel.preserve_mode == kwargs["preserve_mode"] + + @pytest.mark.parametrize( + "kwarg,expected", + [ + # Files should be world-readable by default + ("mode", 0o644), + # The previous behaviour of not preserving mode should be retained + ("preserve_mode", False), + ], + ) + def test_defaults(self, m_write_file, kwarg, expected): + """Test that ensure_file defaults appropriately.""" + util.ensure_file(mock.sentinel.path) + + assert 1 == m_write_file.call_count + _args, kwargs = m_write_file.call_args + assert expected == kwargs[kwarg] + + def test_static_parameters_are_passed(self, m_write_file): + """Test that the static write_files parameters are passed correctly.""" + util.ensure_file(mock.sentinel.path) + + assert 1 == m_write_file.call_count + _args, kwargs = m_write_file.call_args + assert "" == kwargs["content"] + assert "ab" == kwargs["omode"] + + # vi: ts=4 expandtab diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index f234b962..1317e063 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -26,7 +26,6 @@ LOG = logging.getLogger(__name__) NOT_MULTIPART_TYPE = handlers.NOT_MULTIPART_TYPE PART_FN_TPL = handlers.PART_FN_TPL OCTET_TYPE = handlers.OCTET_TYPE -INCLUDE_MAP = handlers.INCLUSION_TYPES_MAP # Saves typing errors CONTENT_TYPE = 'Content-Type' diff --git a/cloudinit/util.py b/cloudinit/util.py index cf9e349f..769f3425 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -62,12 +62,6 @@ TRUE_STRINGS = ('true', '1', 'on', 'yes') FALSE_STRINGS = ('off', '0', 'no', 'false') -# Helper utils to see if running in a container -CONTAINER_TESTS = (['systemd-detect-virt', '--quiet', '--container'], - ['running-in-container'], - ['lxc-is-container']) - - def kernel_version(): return tuple(map(int, os.uname().release.split('.')[:2])) @@ -159,32 +153,6 @@ def fully_decoded_payload(part): return cte_payload -# Path for DMI Data -DMI_SYS_PATH = "/sys/class/dmi/id" - -# dmidecode and /sys/class/dmi/id/* use different names for the same value, -# this allows us to refer to them by one canonical name -DMIDECODE_TO_DMI_SYS_MAPPING = { - 'baseboard-asset-tag': 'board_asset_tag', - 'baseboard-manufacturer': 'board_vendor', - 'baseboard-product-name': 'board_name', - 'baseboard-serial-number': 'board_serial', - 'baseboard-version': 'board_version', - 'bios-release-date': 'bios_date', - 'bios-vendor': 'bios_vendor', - 'bios-version': 'bios_version', - 'chassis-asset-tag': 'chassis_asset_tag', - 'chassis-manufacturer': 'chassis_vendor', - 'chassis-serial-number': 'chassis_serial', - 'chassis-version': 'chassis_version', - 'system-manufacturer': 'sys_vendor', - 'system-product-name': 'product_name', - 'system-serial-number': 'product_serial', - 'system-uuid': 'product_uuid', - 'system-version': 'product_version', -} - - class SeLinuxGuard(object): def __init__(self, path, recursive=False): # Late import since it might not always @@ -418,6 +386,11 @@ def multi_log(text, console=True, stderr=True, @lru_cache() +def is_Linux(): + return 'Linux' in platform.system() + + +@lru_cache() def is_BSD(): return 'BSD' in platform.system() @@ -761,8 +734,9 @@ def del_dir(path): # 'meta-data' entries def read_optional_seed(fill, base="", ext="", timeout=5): try: - (md, ud) = read_seeded(base, ext, timeout) + (md, ud, vd) = read_seeded(base, ext, timeout) fill['user-data'] = ud + fill['vendor-data'] = vd fill['meta-data'] = md return True except url_helper.UrlError as e: @@ -840,9 +814,11 @@ def load_yaml(blob, default=None, allowed=(dict,)): def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): if base.find("%s") >= 0: ud_url = base % ("user-data" + ext) + vd_url = base % ("vendor-data" + ext) md_url = base % ("meta-data" + ext) else: ud_url = "%s%s%s" % (base, "user-data", ext) + vd_url = "%s%s%s" % (base, "vendor-data", ext) md_url = "%s%s%s" % (base, "meta-data", ext) md_resp = url_helper.read_file_or_url(md_url, timeout=timeout, @@ -857,7 +833,19 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): if ud_resp.ok(): ud = ud_resp.contents - return (md, ud) + vd = None + try: + vd_resp = url_helper.read_file_or_url(vd_url, timeout=timeout, + retries=retries) + except url_helper.UrlError as e: + LOG.debug("Error in vendor-data response: %s", e) + else: + if vd_resp.ok(): + vd = vd_resp.contents + else: + LOG.debug("Error in vendor-data response") + + return (md, ud, vd) def read_conf_d(confd): @@ -1646,16 +1634,17 @@ def mount_cb(device, callback, data=None, mtype=None, _type=type(mtype))) # clean up 'mtype' input a bit based on platform. - platsys = platform.system().lower() - if platsys == "linux": + if is_Linux(): if mtypes is None: mtypes = ["auto"] - elif platsys.endswith("bsd"): + elif is_BSD(): if mtypes is None: - mtypes = ['ufs', 'cd9660', 'vfat'] + mtypes = ['ufs', 'cd9660', 'msdos'] for index, mtype in enumerate(mtypes): if mtype == "iso9660": mtypes[index] = "cd9660" + if mtype in ["vfat", "msdosfs"]: + mtypes[index] = "msdos" else: # we cannot do a smart "auto", so just call 'mount' once with no -t mtypes = [''] @@ -1789,8 +1778,12 @@ def append_file(path, content): write_file(path, content, omode="ab", mode=None) -def ensure_file(path, mode=0o644): - write_file(path, content='', omode="ab", mode=mode) +def ensure_file( + path, mode: int = 0o644, *, preserve_mode: bool = False +) -> None: + write_file( + path, content='', omode="ab", mode=mode, preserve_mode=preserve_mode + ) def safe_int(possible_int): @@ -1929,19 +1922,52 @@ def strip_prefix_suffix(line, prefix=None, suffix=None): return line +def _cmd_exits_zero(cmd): + if subp.which(cmd[0]) is None: + return False + try: + subp.subp(cmd) + except subp.ProcessExecutionError: + return False + return True + + +def _is_container_systemd(): + return _cmd_exits_zero(["systemd-detect-virt", "--quiet", "--container"]) + + +def _is_container_upstart(): + return _cmd_exits_zero(["running-in-container"]) + + +def _is_container_old_lxc(): + return _cmd_exits_zero(["lxc-is-container"]) + + +def _is_container_freebsd(): + if not is_FreeBSD(): + return False + cmd = ["sysctl", "-qn", "security.jail.jailed"] + if subp.which(cmd[0]) is None: + return False + out, _ = subp.subp(cmd) + return out.strip() == "1" + + +@lru_cache() def is_container(): """ Checks to see if this code running in a container of some sort """ - - for helper in CONTAINER_TESTS: - try: - # try to run a helper program. if it returns true/zero - # then we're inside a container. otherwise, no - subp.subp(helper) + checks = ( + _is_container_systemd, + _is_container_freebsd, + _is_container_upstart, + _is_container_old_lxc) + + for helper in checks: + if helper(): return True - except (IOError, OSError): - pass # this code is largely from the logic in # ubuntu's /etc/init/container-detect.conf @@ -2396,57 +2422,6 @@ def human2bytes(size): return int(num * mpliers[mplier]) -def _read_dmi_syspath(key): - """ - Reads dmi data with from /sys/class/dmi/id - """ - if key not in DMIDECODE_TO_DMI_SYS_MAPPING: - return None - mapped_key = DMIDECODE_TO_DMI_SYS_MAPPING[key] - dmi_key_path = "{0}/{1}".format(DMI_SYS_PATH, mapped_key) - LOG.debug("querying dmi data %s", dmi_key_path) - try: - if not os.path.exists(dmi_key_path): - LOG.debug("did not find %s", dmi_key_path) - return None - - key_data = load_file(dmi_key_path, decode=False) - if not key_data: - LOG.debug("%s did not return any data", dmi_key_path) - return None - - # uninitialized dmi values show as all \xff and /sys appends a '\n'. - # in that event, return a string of '.' in the same length. - if key_data == b'\xff' * (len(key_data) - 1) + b'\n': - key_data = b"" - - str_data = key_data.decode('utf8').strip() - LOG.debug("dmi data %s returned %s", dmi_key_path, str_data) - return str_data - - except Exception: - logexc(LOG, "failed read of %s", dmi_key_path) - return None - - -def _call_dmidecode(key, dmidecode_path): - """ - Calls out to dmidecode to get the data out. This is mostly for supporting - OS's without /sys/class/dmi/id support. - """ - try: - cmd = [dmidecode_path, "--string", key] - (result, _err) = subp.subp(cmd) - result = result.strip() - LOG.debug("dmidecode returned '%s' for '%s'", result, key) - if result.replace(".", "") == "": - return "" - return result - except (IOError, OSError) as e: - LOG.debug('failed dmidecode cmd: %s\n%s', cmd, e) - return None - - def is_x86(uname_arch=None): """Return True if platform is x86-based""" if uname_arch is None: @@ -2457,49 +2432,6 @@ def is_x86(uname_arch=None): return x86_arch_match -def read_dmi_data(key): - """ - Wrapper for reading DMI data. - - If running in a container return None. This is because DMI data is - assumed to be not useful in a container as it does not represent the - container but rather the host. - - This will do the following (returning the first that produces a - result): - 1) Use a mapping to translate `key` from dmidecode naming to - sysfs naming and look in /sys/class/dmi/... for a value. - 2) Use `key` as a sysfs key directly and look in /sys/class/dmi/... - 3) Fall-back to passing `key` to `dmidecode --string`. - - If all of the above fail to find a value, None will be returned. - """ - - if is_container(): - return None - - syspath_value = _read_dmi_syspath(key) - if syspath_value is not None: - return syspath_value - - # running dmidecode can be problematic on some arches (LP: #1243287) - uname_arch = os.uname()[4] - if not (is_x86(uname_arch) or - uname_arch == 'aarch64' or - uname_arch == 'amd64'): - LOG.debug("dmidata is not supported on %s", uname_arch) - return None - - print("hi, now its: %s\n", subp) - dmidecode_path = subp.which('dmidecode') - if dmidecode_path: - return _call_dmidecode(key, dmidecode_path) - - LOG.warning("did not find either path %s or dmidecode command", - DMI_SYS_PATH) - return None - - def message_from_string(string): if sys.version_info[:2] < (2, 7): return email.message_from_file(io.StringIO(string)) diff --git a/cloudinit/version.py b/cloudinit/version.py index 8560d087..f25e9145 100644 --- a/cloudinit/version.py +++ b/cloudinit/version.py @@ -4,7 +4,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. -__VERSION__ = "20.3" +__VERSION__ = "20.4" _PACKAGED_VERSION = '@@PACKAGED_VERSION@@' FEATURES = [ |