diff options
author | James Falcon <therealfalcon@gmail.com> | 2021-07-19 14:13:21 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-19 14:13:21 -0500 |
commit | 184c836a16e9954a2cba11ae21f07923077ec904 (patch) | |
tree | 6289d70e4f833d300a25136dde6a56fcd1b0a0dc | |
parent | eacb0353803263934aa2ac827c37e461c87cb107 (diff) | |
download | vyos-cloud-init-184c836a16e9954a2cba11ae21f07923077ec904.tar.gz vyos-cloud-init-184c836a16e9954a2cba11ae21f07923077ec904.zip |
Initial hotplug support (#936)
Adds a udev script which will invoke a hotplug hook script on all net
add events. The script will write some udev arguments to a systemd FIFO
socket (to ensure we have only instance of cloud-init running at a
time), which is then read by a new service that calls a new 'cloud-init
devel hotplug-hook' command to handle the new event.
This hotplug-hook command will:
- Fetch the pickled datsource
- Verify that the hotplug event is supported/enabled
- Update the metadata for the datasource
- Ensure the hotplugged device exists within the datasource
- Apply the config change on the datasource metadata
- Bring up the new interface (or apply global network configuration)
- Save the updated metadata back to the pickle cache
Also scattered in some unrelated typing where helpful
23 files changed, 906 insertions, 91 deletions
diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index a9577e9d..b9f137b1 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -28,7 +28,7 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --tarfile --include-userdata" -- $cur_word)) ;; devel) - COMPREPLY=($(compgen -W "--help schema net-convert" -- $cur_word)) + COMPREPLY=($(compgen -W "--help hotplug-hook schema net-convert" -- $cur_word)) ;; dhclient-hook) COMPREPLY=($(compgen -W "--help up down" -- $cur_word)) @@ -64,6 +64,9 @@ _cloudinit_complete() --frequency) COMPREPLY=($(compgen -W "--help instance always once" -- $cur_word)) ;; + hotplug-hook) + COMPREPLY=($(compgen -W "--help" -- $cur_word)) + ;; net-convert) COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/hotplug_hook.py b/cloudinit/cmd/devel/hotplug_hook.py new file mode 100644 index 00000000..0282f24a --- /dev/null +++ b/cloudinit/cmd/devel/hotplug_hook.py @@ -0,0 +1,236 @@ +# This file is part of cloud-init. See LICENSE file for license information. +"""Handle reconfiguration on hotplug events""" +import abc +import argparse +import os +import time + +from cloudinit import log +from cloudinit import reporting +from cloudinit.event import EventScope, EventType +from cloudinit.net import activators, read_sys_net_safe +from cloudinit.net.network_state import parse_net_config_data +from cloudinit.reporting import events +from cloudinit.stages import Init +from cloudinit.sources import DataSource + + +LOG = log.getLogger(__name__) +NAME = 'hotplug-hook' + + +def get_parser(parser=None): + """Build or extend an arg parser for hotplug-hook utility. + + @param parser: Optional existing ArgumentParser instance representing the + subcommand which will be extended to support the args of this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser(prog=NAME, description=__doc__) + + parser.description = __doc__ + parser.add_argument("-d", "--devpath", required=True, + metavar="PATH", + help="sysfs path to hotplugged device") + parser.add_argument("-s", "--subsystem", required=True, + help="subsystem to act on", + choices=['net']) + parser.add_argument("-u", "--udevaction", required=True, + help="action to take", + choices=['add', 'remove']) + + return parser + + +class UeventHandler(abc.ABC): + def __init__(self, id, datasource, devpath, action, success_fn): + self.id = id + self.datasource = datasource # type: DataSource + self.devpath = devpath + self.action = action + self.success_fn = success_fn + + @abc.abstractmethod + def apply(self): + raise NotImplementedError() + + @property + @abc.abstractmethod + def config(self): + raise NotImplementedError() + + @abc.abstractmethod + def device_detected(self) -> bool: + raise NotImplementedError() + + def detect_hotplugged_device(self): + detect_presence = None + if self.action == 'add': + detect_presence = True + elif self.action == 'remove': + detect_presence = False + else: + raise ValueError('Unknown action: %s' % self.action) + + if detect_presence != self.device_detected(): + raise RuntimeError( + 'Failed to detect %s in updated metadata' % self.id) + + def success(self): + return self.success_fn() + + def update_metadata(self): + result = self.datasource.update_metadata_if_supported([ + EventType.HOTPLUG]) + if not result: + raise RuntimeError( + 'Datasource %s not updated for ' + 'event %s' % (self.datasource, EventType.HOTPLUG) + ) + return result + + +class NetHandler(UeventHandler): + def __init__(self, datasource, devpath, action, success_fn): + # convert devpath to mac address + id = read_sys_net_safe(os.path.basename(devpath), 'address') + super().__init__(id, datasource, devpath, action, success_fn) + + def apply(self): + self.datasource.distro.apply_network_config( + self.config, + bring_up=False, + ) + interface_name = os.path.basename(self.devpath) + activator = activators.select_activator() + if self.action == 'add': + if not activator.bring_up_interface(interface_name): + raise RuntimeError( + 'Failed to bring up device: {}'.format(self.devpath)) + elif self.action == 'remove': + if not activator.bring_down_interface(interface_name): + raise RuntimeError( + 'Failed to bring down device: {}'.format(self.devpath)) + + @property + def config(self): + return self.datasource.network_config + + def device_detected(self) -> bool: + netstate = parse_net_config_data(self.config) + found = [ + iface for iface in netstate.iter_interfaces() + if iface.get('mac_address') == self.id + ] + LOG.debug('Ifaces with ID=%s : %s', self.id, found) + return len(found) > 0 + + +SUBSYSTEM_PROPERTES_MAP = { + 'net': (NetHandler, EventScope.NETWORK), +} + + +def handle_hotplug( + hotplug_init: Init, devpath, subsystem, udevaction +): + handler_cls, event_scope = SUBSYSTEM_PROPERTES_MAP.get( + subsystem, (None, None) + ) + if handler_cls is None: + raise Exception( + 'hotplug-hook: cannot handle events for subsystem: {}'.format( + subsystem)) + + LOG.debug('Fetching datasource') + datasource = hotplug_init.fetch(existing="trust") + + if not hotplug_init.update_event_enabled( + event_source_type=EventType.HOTPLUG, + scope=EventScope.NETWORK + ): + LOG.debug('hotplug not enabled for event of type %s', event_scope) + return + + LOG.debug('Creating %s event handler', subsystem) + event_handler = handler_cls( + datasource=datasource, + devpath=devpath, + action=udevaction, + success_fn=hotplug_init._write_to_cache + ) # type: UeventHandler + wait_times = [1, 3, 5, 10, 30] + for attempt, wait in enumerate(wait_times): + LOG.debug( + 'subsystem=%s update attempt %s/%s', + subsystem, + attempt, + len(wait_times) + ) + try: + LOG.debug('Refreshing metadata') + event_handler.update_metadata() + LOG.debug('Detecting device in updated metadata') + event_handler.detect_hotplugged_device() + LOG.debug('Applying config change') + event_handler.apply() + LOG.debug('Updating cache') + event_handler.success() + break + except Exception as e: + LOG.debug('Exception while processing hotplug event. %s', e) + time.sleep(wait) + last_exception = e + else: + raise last_exception # type: ignore + + +def handle_args(name, args): + # Note that if an exception happens between now and when logging is + # setup, we'll only see it in the journal + hotplug_reporter = events.ReportEventStack( + name, __doc__, reporting_enabled=True + ) + + hotplug_init = Init(ds_deps=[], reporter=hotplug_reporter) + hotplug_init.read_cfg() + + log.setupLogging(hotplug_init.cfg) + if 'reporting' in hotplug_init.cfg: + reporting.update_configuration(hotplug_init.cfg.get('reporting')) + + # Logging isn't going to be setup until now + LOG.debug( + '%s called with the following arguments: {udevaction: %s, ' + 'subsystem: %s, devpath: %s}', + name, args.udevaction, args.subsystem, args.devpath + ) + LOG.debug( + '%s called with the following arguments:\n' + 'udevaction: %s\n' + 'subsystem: %s\n' + 'devpath: %s', + name, args.udevaction, args.subsystem, args.devpath + ) + + with hotplug_reporter: + try: + handle_hotplug( + hotplug_init=hotplug_init, + devpath=args.devpath, + subsystem=args.subsystem, + udevaction=args.udevaction, + ) + except Exception: + LOG.exception('Received fatal exception handling hotplug!') + raise + + LOG.debug('Exiting hotplug handler') + reporting.flush_events() + + +if __name__ == '__main__': + args = get_parser().parse_args() + handle_args(NAME, args) diff --git a/cloudinit/cmd/devel/parser.py b/cloudinit/cmd/devel/parser.py index 1a3c46a4..be304630 100644 --- a/cloudinit/cmd/devel/parser.py +++ b/cloudinit/cmd/devel/parser.py @@ -7,6 +7,7 @@ import argparse from cloudinit.config import schema +from . import hotplug_hook from . import net_convert from . import render from . import make_mime @@ -21,6 +22,8 @@ def get_parser(parser=None): subparsers.required = True subcmds = [ + (hotplug_hook.NAME, hotplug_hook.__doc__, + hotplug_hook.get_parser, hotplug_hook.handle_args), ('schema', 'Validate cloud-config files for document schema', schema.get_parser, schema.handle_schema_args), (net_convert.NAME, net_convert.__doc__, diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 2caa8bc2..7bdf2197 100755 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -206,8 +206,15 @@ class Distro(persistence.CloudInitPickleMixin, metaclass=abc.ABCMeta): def generate_fallback_config(self): return net.generate_fallback_config() - def apply_network_config(self, netconfig, bring_up=False): - # apply network config netconfig + def apply_network_config(self, netconfig, bring_up=False) -> bool: + """Apply the network config. + + If bring_up is True, attempt to bring up the passed in devices. If + devices is None, attempt to bring up devices returned by + _write_network_config. + + Returns True if any devices failed to come up, otherwise False. + """ # This method is preferred to apply_network which only takes # a much less complete network config format (interfaces(5)). network_state = parse_net_config_data(netconfig) diff --git a/cloudinit/event.py b/cloudinit/event.py index 76a0afc6..53ad4c25 100644 --- a/cloudinit/event.py +++ b/cloudinit/event.py @@ -29,6 +29,7 @@ class EventType(Enum): BOOT = "boot" BOOT_NEW_INSTANCE = "boot-new-instance" BOOT_LEGACY = "boot-legacy" + HOTPLUG = 'hotplug' def __str__(self): # pylint: disable=invalid-str-returned return self.value diff --git a/cloudinit/net/activators.py b/cloudinit/net/activators.py index 34fee3bf..84aaafc9 100644 --- a/cloudinit/net/activators.py +++ b/cloudinit/net/activators.py @@ -15,31 +15,80 @@ from cloudinit.net.sysconfig import NM_CFG_FILE LOG = logging.getLogger(__name__) +def _alter_interface(cmd, device_name) -> bool: + LOG.debug("Attempting command %s for device %s", cmd, device_name) + try: + (_out, err) = subp.subp(cmd) + if len(err): + LOG.warning("Running %s resulted in stderr output: %s", + cmd, err) + return True + except subp.ProcessExecutionError: + util.logexc(LOG, "Running interface command %s failed", cmd) + return False + + class NetworkActivator(ABC): @staticmethod @abstractmethod def available() -> bool: + """Return True if activator is available, otherwise return False.""" raise NotImplementedError() @staticmethod @abstractmethod def bring_up_interface(device_name: str) -> bool: + """Bring up interface. + + Return True is successful, otherwise return False + """ + raise NotImplementedError() + + @staticmethod + @abstractmethod + def bring_down_interface(device_name: str) -> bool: + """Bring down interface. + + Return True is successful, otherwise return False + """ raise NotImplementedError() @classmethod def bring_up_interfaces(cls, device_names: Iterable[str]) -> bool: - all_succeeded = True - for device in device_names: - if not cls.bring_up_interface(device): - all_succeeded = False - return all_succeeded + """Bring up specified list of interfaces. + + Return True is successful, otherwise return False + """ + return all(cls.bring_up_interface(device) for device in device_names) @classmethod def bring_up_all_interfaces(cls, network_state: NetworkState) -> bool: + """Bring up all interfaces. + + Return True is successful, otherwise return False + """ return cls.bring_up_interfaces( [i['name'] for i in network_state.iter_interfaces()] ) + @classmethod + def bring_down_interfaces(cls, device_names: Iterable[str]) -> bool: + """Bring down specified list of interfaces. + + Return True is successful, otherwise return False + """ + return all(cls.bring_down_interface(device) for device in device_names) + + @classmethod + def bring_down_all_interfaces(cls, network_state: NetworkState) -> bool: + """Bring down all interfaces. + + Return True is successful, otherwise return False + """ + return cls.bring_down_interfaces( + [i['name'] for i in network_state.iter_interfaces()] + ) + class IfUpDownActivator(NetworkActivator): # Note that we're not overriding bring_up_interfaces to pass something @@ -53,24 +102,27 @@ class IfUpDownActivator(NetworkActivator): @staticmethod def bring_up_interface(device_name: str) -> bool: - """Bring up interface using ifup.""" + """Bring up interface using ifup. + + Return True is successful, otherwise return False + """ cmd = ['ifup', device_name] - LOG.debug("Attempting to run bring up interface %s using command %s", - device_name, cmd) - try: - (_out, err) = subp.subp(cmd) - if len(err): - LOG.warning("Running %s resulted in stderr output: %s", - cmd, err) - return True - except subp.ProcessExecutionError: - util.logexc(LOG, "Running interface command %s failed", cmd) - return False + return _alter_interface(cmd, device_name) + + @staticmethod + def bring_down_interface(device_name: str) -> bool: + """Bring up interface using ifup. + + Return True is successful, otherwise return False + """ + cmd = ['ifdown', device_name] + return _alter_interface(cmd, device_name) class NetworkManagerActivator(NetworkActivator): @staticmethod def available(target=None) -> bool: + """ Return true if network manager can be used on this system.""" config_present = os.path.isfile( subp.target_path(target, path=NM_CFG_FILE) ) @@ -79,44 +131,86 @@ class NetworkManagerActivator(NetworkActivator): @staticmethod def bring_up_interface(device_name: str) -> bool: - try: - subp.subp(['nmcli', 'connection', 'up', device_name]) - except subp.ProcessExecutionError: - util.logexc(LOG, "nmcli failed to bring up {}".format(device_name)) - return False - return True + """Bring up interface using nmcli. + + Return True is successful, otherwise return False + """ + cmd = ['nmcli', 'connection', 'up', 'ifname', device_name] + return _alter_interface(cmd, device_name) + + @staticmethod + def bring_down_interface(device_name: str) -> bool: + """Bring down interface using nmcli. + + Return True is successful, otherwise return False + """ + cmd = ['nmcli', 'connection', 'down', device_name] + return _alter_interface(cmd, device_name) class NetplanActivator(NetworkActivator): + NETPLAN_CMD = ['netplan', 'apply'] + @staticmethod def available(target=None) -> bool: + """ Return true if netplan can be used on this system.""" return netplan_available(target=target) @staticmethod - def _apply_netplan(): - LOG.debug('Applying current netplan config') - try: - subp.subp(['netplan', 'apply'], capture=True) - except subp.ProcessExecutionError: - util.logexc(LOG, "netplan apply failed") - return False - return True - - @staticmethod def bring_up_interface(device_name: str) -> bool: + """Apply netplan config. + + Return True is successful, otherwise return False + """ LOG.debug("Calling 'netplan apply' rather than " - "bringing up individual interfaces") - return NetplanActivator._apply_netplan() + "altering individual interfaces") + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') @staticmethod def bring_up_interfaces(device_names: Iterable[str]) -> bool: + """Apply netplan config. + + Return True is successful, otherwise return False + """ LOG.debug("Calling 'netplan apply' rather than " - "bringing up individual interfaces") - return NetplanActivator._apply_netplan() + "altering individual interfaces") + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') @staticmethod def bring_up_all_interfaces(network_state: NetworkState) -> bool: - return NetplanActivator._apply_netplan() + """Apply netplan config. + + Return True is successful, otherwise return False + """ + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') + + @staticmethod + def bring_down_interface(device_name: str) -> bool: + """Apply netplan config. + + Return True is successful, otherwise return False + """ + LOG.debug("Calling 'netplan apply' rather than " + "altering individual interfaces") + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') + + @staticmethod + def bring_down_interfaces(device_names: Iterable[str]) -> bool: + """Apply netplan config. + + Return True is successful, otherwise return False + """ + LOG.debug("Calling 'netplan apply' rather than " + "altering individual interfaces") + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') + + @staticmethod + def bring_down_all_interfaces(network_state: NetworkState) -> bool: + """Apply netplan config. + + Return True is successful, otherwise return False + """ + return _alter_interface(NetplanActivator.NETPLAN_CMD, 'all') # This section is mostly copied and pasted from renderers.py. An abstract @@ -153,4 +247,6 @@ def select_activator(priority=None, target=None) -> Type[NetworkActivator]: raise RuntimeError( "No available network activators found%s. Searched " "through list: %s" % (tmsg, priority)) - return found[0] + selected = found[0] + LOG.debug('Using selected activator: %s', selected) + return selected diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index 62756cf7..19c8d126 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -12,9 +12,8 @@ from cloudinit import log as logging from cloudinit import sources from cloudinit import subp from cloudinit import util - +from cloudinit.event import EventScope, EventType from cloudinit.net import eni - from cloudinit.sources.DataSourceIBMCloud import get_ibm_platform from cloudinit.sources.helpers import openstack @@ -37,6 +36,13 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource): dsname = 'ConfigDrive' + supported_update_events = {EventScope.NETWORK: { + EventType.BOOT_NEW_INSTANCE, + EventType.BOOT, + EventType.BOOT_LEGACY, + EventType.HOTPLUG, + }} + def __init__(self, sys_cfg, distro, paths): super(DataSourceConfigDrive, self).__init__(sys_cfg, distro, paths) self.source = None diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 8a7f7c60..700437b0 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -76,6 +76,13 @@ class DataSourceEc2(sources.DataSource): # Whether we want to get network configuration from the metadata service. perform_dhcp_setup = False + supported_update_events = {EventScope.NETWORK: { + EventType.BOOT_NEW_INSTANCE, + EventType.BOOT, + EventType.BOOT_LEGACY, + EventType.HOTPLUG, + }} + def __init__(self, sys_cfg, distro, paths): super(DataSourceEc2, self).__init__(sys_cfg, distro, paths) self.metadata_address = None diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index 619a171e..a85b71d7 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -8,11 +8,11 @@ import time from cloudinit import dmi from cloudinit import log as logging -from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit import sources from cloudinit import url_helper from cloudinit import util - +from cloudinit.event import EventScope, EventType +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit.sources.helpers import openstack from cloudinit.sources import DataSourceOracle as oracle @@ -46,6 +46,13 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): # Whether we want to get network configuration from the metadata service. perform_dhcp_setup = False + supported_update_events = {EventScope.NETWORK: { + EventType.BOOT_NEW_INSTANCE, + EventType.BOOT, + EventType.BOOT_LEGACY, + EventType.HOTPLUG + }} + def __init__(self, sys_cfg, distro, paths): super(DataSourceOpenStack, self).__init__(sys_cfg, distro, paths) self.metadata_address = None diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 9d25b0ee..bf6bf139 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -23,6 +23,7 @@ from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util from cloudinit.atomic_helper import write_json +from cloudinit.distros import Distro from cloudinit.event import EventScope, EventType from cloudinit.filters import launch_index from cloudinit.persistence import CloudInitPickleMixin @@ -215,7 +216,7 @@ class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta): _ci_pkl_version = 1 - def __init__(self, sys_cfg, distro, paths, ud_proc=None): + def __init__(self, sys_cfg, distro: Distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro self.paths = paths diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 06e0d9b1..bc164fa0 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -241,7 +241,7 @@ class Init(object): else: return (None, "cache invalid in datasource: %s" % ds) - def _get_data_source(self, existing): + def _get_data_source(self, existing) -> sources.DataSource: if self.datasource is not NULL_DATA_SOURCE: return self.datasource @@ -267,7 +267,7 @@ class Init(object): cfg_list, pkg_list, self.reporter) LOG.info("Loaded datasource %s - %s", dsname, ds) - self.datasource = ds + self.datasource = ds # type: sources.DataSource # Ensure we adjust our path members datasource # now that we have one (thus allowing ipath to be used) self._reset() diff --git a/doc/rtd/topics/cli.rst b/doc/rtd/topics/cli.rst index 0ff230b5..b6115ed6 100644 --- a/doc/rtd/topics/cli.rst +++ b/doc/rtd/topics/cli.rst @@ -119,6 +119,10 @@ Current subcommands: schema errors locally without the need for deployment. Schema validation is work in progress and supports a subset of cloud-config modules. + * ``hotplug-hook``: respond to newly added system devices by retrieving + updated system metadata and bringing up/down the corresponding device. + This command is intended to be called via a systemd service and is + not considered user-accessible except for debugging purposes. .. _cli_features: diff --git a/doc/rtd/topics/events.rst b/doc/rtd/topics/events.rst index 463208cc..984e7577 100644 --- a/doc/rtd/topics/events.rst +++ b/doc/rtd/topics/events.rst @@ -20,11 +20,11 @@ event types: boot: once during Local stage, then again in Network stage. As this behavior was previously the default behavior, this option exists to prevent regressing such behavior. +- **HOTPLUG**: Dynamic add of a system device Future work will likely include infrastructure and support for the following events: -- **HOTPLUG**: Dynamic add of a system device - **METADATA_CHANGE**: An instance's metadata has change - **USER_REQUEST**: Directed request to update @@ -64,6 +64,12 @@ arbitrary values can be used. Each ``scope`` requires a ``when`` element to specify which events are to allowed to be handled. +Hotplug +======= +When the hotplug event is supported by the data source and configured in +user data, cloud-init will respond to the addition or removal of network +interfaces to the system. In addition to fetching and updating the system +metadata, cloud-init will also bring up/down the newly added interface. Examples ======== @@ -77,7 +83,7 @@ On every boot, apply network configuration found in the datasource. # apply network config on every boot updates: network: - when: ['boot'] + when: ['boot', 'hotplug'] .. _Cloud-init: https://launchpad.net/cloud-init .. vi: textwidth=78 diff --git a/packages/redhat/cloud-init.spec.in b/packages/redhat/cloud-init.spec.in index 16138012..b930709b 100644 --- a/packages/redhat/cloud-init.spec.in +++ b/packages/redhat/cloud-init.spec.in @@ -119,6 +119,12 @@ version_pys=$(cd "$RPM_BUILD_ROOT" && find . -name version.py -type f) ( cd "$RPM_BUILD_ROOT" && sed -i "s,@@PACKAGED_VERSION@@,%{version}-%{release}," $version_pys ) +# patch hotplug /usr/libexec script path +hotplug_file=$(cd "$RPM_BUILD_ROOT" && find . -name 10-cloud-init-hook-hotplug.rules -type f) + +( cd "$RPM_BUILD_ROOT" && + sed -i "s,/usr/lib,%{_libexecdir}," $hotplug_file ) + %clean rm -rf $RPM_BUILD_ROOT @@ -172,6 +178,7 @@ fi %files /lib/udev/rules.d/66-azure-ephemeral.rules +/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules %if "%{init_system}" == "systemd" /usr/lib/systemd/system-generators/cloud-init-generator @@ -128,6 +128,7 @@ INITSYS_FILES = { 'systemd': [render_tmpl(f) for f in (glob('systemd/*.tmpl') + glob('systemd/*.service') + + glob('systemd/*.socket') + glob('systemd/*.target')) if (is_f(f) and not is_generator(f))], 'systemd.generators': [ @@ -249,6 +250,7 @@ data_files = [ (ETC + '/cloud/cloud.cfg.d', glob('config/cloud.cfg.d/*')), (ETC + '/cloud/templates', glob('templates/*')), (USR_LIB_EXEC + '/cloud-init', ['tools/ds-identify', + 'tools/hook-hotplug', 'tools/uncloud-init', 'tools/write-ssh-key-fingerprints']), (USR + '/share/bash-completion/completions', diff --git a/systemd/cloud-init-generator.tmpl b/systemd/cloud-init-generator.tmpl index 0713db16..0713db16 100755..100644 --- a/systemd/cloud-init-generator.tmpl +++ b/systemd/cloud-init-generator.tmpl diff --git a/systemd/cloud-init-hotplugd.service b/systemd/cloud-init-hotplugd.service new file mode 100644 index 00000000..b64632ef --- /dev/null +++ b/systemd/cloud-init-hotplugd.service @@ -0,0 +1,22 @@ +# Paired with cloud-init-hotplugd.socket to read from the FIFO +# /run/cloud-init/hook-hotplug-cmd which is created during a udev network +# add or remove event as processed by 10-cloud-init-hook-hotplug.rules. + +# On start, read args from the FIFO, process and provide structured arguments +# to `cloud-init devel hotplug-hook` which will setup or teardown network +# devices as configured by user-data. + +# Known bug with an enforcing SELinux policy: LP: #1936229 +# cloud-init-hotplud.service will read args from file descriptor 3 + +[Unit] +Description=cloud-init hotplug hook daemon +After=cloud-init-hotplugd.socket + +[Service] +Type=simple +ExecStart=/bin/bash -c 'read args <&3; echo "args=$args"; \ + exec /usr/bin/cloud-init devel hotplug-hook $args; \ + exit 0' +SyslogIdentifier=cloud-init-hotplugd +TimeoutStopSec=5 diff --git a/systemd/cloud-init-hotplugd.socket b/systemd/cloud-init-hotplugd.socket new file mode 100644 index 00000000..aa093016 --- /dev/null +++ b/systemd/cloud-init-hotplugd.socket @@ -0,0 +1,13 @@ +# cloud-init-hotplugd.socket listens on the FIFO file +# /run/cloud-init/hook-hotplug-cmd which is created during a udev network +# add or remove event as processed by 10-cloud-init-hook-hotplug.rules. + +# Known bug with an enforcing SELinux policy: LP: #1936229 +[Unit] +Description=cloud-init hotplug hook socket + +[Socket] +ListenFIFO=/run/cloud-init/hook-hotplug-cmd + +[Install] +WantedBy=cloud-init.target diff --git a/tests/integration_tests/modules/test_hotplug.py b/tests/integration_tests/modules/test_hotplug.py new file mode 100644 index 00000000..b683566f --- /dev/null +++ b/tests/integration_tests/modules/test_hotplug.py @@ -0,0 +1,94 @@ +import pytest +import time +import yaml +from collections import namedtuple + +from tests.integration_tests.instances import IntegrationInstance + +USER_DATA = """\ +#cloud-config +updates: + network: + when: ['hotplug'] +""" + +ip_addr = namedtuple('ip_addr', 'interface state ip4 ip6') + + +def _wait_till_hotplug_complete(client, expected_runs=1): + for _ in range(60): + log = client.read_from_file('/var/log/cloud-init.log') + if log.count('Exiting hotplug handler') == expected_runs: + return log + time.sleep(1) + raise Exception('Waiting for hotplug handler failed') + + +def _get_ip_addr(client): + ips = [] + lines = client.execute('ip --brief addr').split('\n') + for line in lines: + attributes = line.split() + interface, state = attributes[0], attributes[1] + ip4_cidr = attributes[2] if len(attributes) > 2 else None + ip6_cidr = attributes[3] if len(attributes) > 3 else None + ip4 = ip4_cidr.split('/')[0] if ip4_cidr else None + ip6 = ip6_cidr.split('/')[0] if ip6_cidr else None + ip = ip_addr(interface, state, ip4, ip6) + ips.append(ip) + return ips + + +@pytest.mark.openstack +@pytest.mark.user_data(USER_DATA) +def test_hotplug_add_remove(client: IntegrationInstance): + ips_before = _get_ip_addr(client) + log = client.read_from_file('/var/log/cloud-init.log') + assert 'Exiting hotplug handler' not in log + + # Add new NIC + added_ip = client.instance.add_network_interface() + _wait_till_hotplug_complete(client) + ips_after_add = _get_ip_addr(client) + new_addition = [ip for ip in ips_after_add if ip.ip4 == added_ip][0] + + assert len(ips_after_add) == len(ips_before) + 1 + assert added_ip not in [ip.ip4 for ip in ips_before] + assert added_ip in [ip.ip4 for ip in ips_after_add] + assert new_addition.state == 'UP' + + netplan_cfg = client.read_from_file('/etc/netplan/50-cloud-init.yaml') + config = yaml.safe_load(netplan_cfg) + assert new_addition.interface in config['network']['ethernets'] + + # Remove new NIC + client.instance.remove_network_interface(added_ip) + _wait_till_hotplug_complete(client, expected_runs=2) + ips_after_remove = _get_ip_addr(client) + assert len(ips_after_remove) == len(ips_before) + assert added_ip not in [ip.ip4 for ip in ips_after_remove] + + netplan_cfg = client.read_from_file('/etc/netplan/50-cloud-init.yaml') + config = yaml.safe_load(netplan_cfg) + assert new_addition.interface not in config['network']['ethernets'] + + +@pytest.mark.openstack +def test_no_hotplug_in_userdata(client: IntegrationInstance): + ips_before = _get_ip_addr(client) + log = client.read_from_file('/var/log/cloud-init.log') + assert 'Exiting hotplug handler' not in log + + # Add new NIC + client.instance.add_network_interface() + _wait_till_hotplug_complete(client) + log = client.read_from_file('/var/log/cloud-init.log') + assert 'hotplug not enabled for event of type network' in log + + ips_after_add = _get_ip_addr(client) + if len(ips_after_add) == len(ips_before) + 1: + # We can see the device, but it should not have been brought up + new_ip = [ip for ip in ips_after_add if ip not in ips_before][0] + assert new_ip.state == 'DOWN' + else: + assert len(ips_after_add) == len(ips_before) diff --git a/tests/unittests/cmd/devel/test_hotplug_hook.py b/tests/unittests/cmd/devel/test_hotplug_hook.py new file mode 100644 index 00000000..63d2490e --- /dev/null +++ b/tests/unittests/cmd/devel/test_hotplug_hook.py @@ -0,0 +1,218 @@ +import pytest +from collections import namedtuple +from unittest import mock +from unittest.mock import call + +from cloudinit.cmd.devel.hotplug_hook import handle_hotplug +from cloudinit.distros import Distro +from cloudinit.event import EventType +from cloudinit.net.activators import NetworkActivator +from cloudinit.net.network_state import NetworkState +from cloudinit.sources import DataSource +from cloudinit.stages import Init + + +hotplug_args = namedtuple('hotplug_args', 'udevaction, subsystem, devpath') +FAKE_MAC = '11:22:33:44:55:66' + + +@pytest.yield_fixture +def mocks(): + m_init = mock.MagicMock(spec=Init) + m_distro = mock.MagicMock(spec=Distro) + m_datasource = mock.MagicMock(spec=DataSource) + m_datasource.distro = m_distro + m_init.datasource = m_datasource + m_init.fetch.return_value = m_datasource + + read_sys_net = mock.patch( + 'cloudinit.cmd.devel.hotplug_hook.read_sys_net_safe', + return_value=FAKE_MAC + ) + + m_network_state = mock.MagicMock(spec=NetworkState) + parse_net = mock.patch( + 'cloudinit.cmd.devel.hotplug_hook.parse_net_config_data', + return_value=m_network_state + ) + + m_activator = mock.MagicMock(spec=NetworkActivator) + select_activator = mock.patch( + 'cloudinit.cmd.devel.hotplug_hook.activators.select_activator', + return_value=m_activator + ) + + sleep = mock.patch('time.sleep') + + read_sys_net.start() + parse_net.start() + select_activator.start() + m_sleep = sleep.start() + + yield namedtuple('mocks', 'm_init m_network_state m_activator m_sleep')( + m_init=m_init, + m_network_state=m_network_state, + m_activator=m_activator, + m_sleep=m_sleep, + ) + + read_sys_net.stop() + parse_net.stop() + select_activator.stop() + sleep.stop() + + +class TestUnsupportedActions: + def test_unsupported_subsystem(self, mocks): + with pytest.raises( + Exception, + match='cannot handle events for subsystem: not_real' + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + subsystem='not_real', + udevaction='add' + ) + + def test_unsupported_udevaction(self, mocks): + with pytest.raises(ValueError, match='Unknown action: not_real'): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='not_real', + subsystem='net' + ) + + +class TestHotplug: + def test_succcessful_add(self, mocks): + init = mocks.m_init + mocks.m_network_state.iter_interfaces.return_value = [{ + 'mac_address': FAKE_MAC, + }] + handle_hotplug( + hotplug_init=init, + devpath='/dev/fake', + udevaction='add', + subsystem='net' + ) + init.datasource.update_metadata_if_supported.assert_called_once_with([ + EventType.HOTPLUG + ]) + mocks.m_activator.bring_up_interface.assert_called_once_with('fake') + mocks.m_activator.bring_down_interface.assert_not_called() + init._write_to_cache.assert_called_once_with() + + def test_successful_remove(self, mocks): + init = mocks.m_init + mocks.m_network_state.iter_interfaces.return_value = [{}] + handle_hotplug( + hotplug_init=init, + devpath='/dev/fake', + udevaction='remove', + subsystem='net' + ) + init.datasource.update_metadata_if_supported.assert_called_once_with([ + EventType.HOTPLUG + ]) + mocks.m_activator.bring_down_interface.assert_called_once_with('fake') + mocks.m_activator.bring_up_interface.assert_not_called() + init._write_to_cache.assert_called_once_with() + + def test_update_event_disabled(self, mocks, caplog): + init = mocks.m_init + init.update_event_enabled.return_value = False + handle_hotplug( + hotplug_init=init, + devpath='/dev/fake', + udevaction='remove', + subsystem='net' + ) + assert 'hotplug not enabled for event of type' in caplog.text + init.datasource.update_metadata_if_supported.assert_not_called() + mocks.m_activator.bring_up_interface.assert_not_called() + mocks.m_activator.bring_down_interface.assert_not_called() + init._write_to_cache.assert_not_called() + + def test_update_metadata_failed(self, mocks): + mocks.m_init.datasource.update_metadata_if_supported.return_value = \ + False + with pytest.raises( + RuntimeError, match='Datasource .* not updated for event hotplug' + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='remove', + subsystem='net' + ) + + def test_detect_hotplugged_device_not_detected_on_add(self, mocks): + mocks.m_network_state.iter_interfaces.return_value = [{}] + with pytest.raises( + RuntimeError, + match='Failed to detect {} in updated metadata'.format(FAKE_MAC) + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='add', + subsystem='net' + ) + + def test_detect_hotplugged_device_detected_on_remove(self, mocks): + mocks.m_network_state.iter_interfaces.return_value = [{ + 'mac_address': FAKE_MAC, + }] + with pytest.raises( + RuntimeError, + match='Failed to detect .* in updated metadata' + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='remove', + subsystem='net' + ) + + def test_apply_failed_on_add(self, mocks): + mocks.m_network_state.iter_interfaces.return_value = [{ + 'mac_address': FAKE_MAC, + }] + mocks.m_activator.bring_up_interface.return_value = False + with pytest.raises( + RuntimeError, match='Failed to bring up device: /dev/fake' + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='add', + subsystem='net' + ) + + def test_apply_failed_on_remove(self, mocks): + mocks.m_network_state.iter_interfaces.return_value = [{}] + mocks.m_activator.bring_down_interface.return_value = False + with pytest.raises( + RuntimeError, match='Failed to bring down device: /dev/fake' + ): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='remove', + subsystem='net' + ) + + def test_retry(self, mocks): + with pytest.raises(RuntimeError): + handle_hotplug( + hotplug_init=mocks.m_init, + devpath='/dev/fake', + udevaction='add', + subsystem='net' + ) + assert mocks.m_sleep.call_count == 5 + assert mocks.m_sleep.call_args_list == [ + call(1), call(3), call(5), call(10), call(30) + ] diff --git a/tests/unittests/test_net_activators.py b/tests/unittests/test_net_activators.py index f11486ff..db825c35 100644 --- a/tests/unittests/test_net_activators.py +++ b/tests/unittests/test_net_activators.py @@ -35,32 +35,8 @@ ethernets: dhcp4: true """ -IF_UP_DOWN_AVAILABLE_CALLS = [ - (('ifquery',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), - (('ifup',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), - (('ifdown',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), -] - -IF_UP_DOWN_CALL_LIST = [ - ((['ifup', 'eth0'], ), {}), - ((['ifup', 'eth1'], ), {}), -] - -NETPLAN_AVAILABLE_CALLS = [ - (('netplan',), {'search': ['/usr/sbin', '/sbin'], 'target': None}), -] - NETPLAN_CALL_LIST = [ - ((['netplan', 'apply'], ), {'capture': True}), -] - -NETWORK_MANAGER_AVAILABLE_CALLS = [ - (('nmcli',), {'target': None}), -] - -NETWORK_MANAGER_CALL_LIST = [ - ((['nmcli', 'connection', 'up', 'eth0'], ), {}), - ((['nmcli', 'connection', 'up', 'eth1'], ), {}), + ((['netplan', 'apply'], ), {}), ] @@ -126,23 +102,54 @@ class TestSearchAndSelect: select_activator() -@pytest.mark.parametrize('activator, available_calls, expected_call_list', [ - (IfUpDownActivator, IF_UP_DOWN_AVAILABLE_CALLS, IF_UP_DOWN_CALL_LIST), - (NetplanActivator, NETPLAN_AVAILABLE_CALLS, NETPLAN_CALL_LIST), - (NetworkManagerActivator, NETWORK_MANAGER_AVAILABLE_CALLS, - NETWORK_MANAGER_CALL_LIST), +IF_UP_DOWN_AVAILABLE_CALLS = [ + (('ifquery',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), + (('ifup',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), + (('ifdown',), {'search': ['/sbin', '/usr/sbin'], 'target': None}), +] + +NETPLAN_AVAILABLE_CALLS = [ + (('netplan',), {'search': ['/usr/sbin', '/sbin'], 'target': None}), +] + +NETWORK_MANAGER_AVAILABLE_CALLS = [ + (('nmcli',), {'target': None}), +] + + +@pytest.mark.parametrize('activator, available_calls', [ + (IfUpDownActivator, IF_UP_DOWN_AVAILABLE_CALLS), + (NetplanActivator, NETPLAN_AVAILABLE_CALLS), + (NetworkManagerActivator, NETWORK_MANAGER_AVAILABLE_CALLS), ]) -class TestIfUpDownActivator: +class TestActivatorsAvailable: def test_available( - self, activator, available_calls, expected_call_list, available_mocks + self, activator, available_calls, available_mocks ): activator.available() assert available_mocks.m_which.call_args_list == available_calls + +IF_UP_DOWN_BRING_UP_CALL_LIST = [ + ((['ifup', 'eth0'], ), {}), + ((['ifup', 'eth1'], ), {}), +] + +NETWORK_MANAGER_BRING_UP_CALL_LIST = [ + ((['nmcli', 'connection', 'up', 'ifname', 'eth0'], ), {}), + ((['nmcli', 'connection', 'up', 'ifname', 'eth1'], ), {}), +] + + +@pytest.mark.parametrize('activator, expected_call_list', [ + (IfUpDownActivator, IF_UP_DOWN_BRING_UP_CALL_LIST), + (NetplanActivator, NETPLAN_CALL_LIST), + (NetworkManagerActivator, NETWORK_MANAGER_BRING_UP_CALL_LIST), +]) +class TestActivatorsBringUp: @patch('cloudinit.subp.subp', return_value=('', '')) def test_bring_up_interface( - self, m_subp, activator, available_calls, expected_call_list, - available_mocks + self, m_subp, activator, expected_call_list, available_mocks ): activator.bring_up_interface('eth0') assert len(m_subp.call_args_list) == 1 @@ -150,16 +157,14 @@ class TestIfUpDownActivator: @patch('cloudinit.subp.subp', return_value=('', '')) def test_bring_up_interfaces( - self, m_subp, activator, available_calls, expected_call_list, - available_mocks + self, m_subp, activator, expected_call_list, available_mocks ): activator.bring_up_interfaces(['eth0', 'eth1']) assert expected_call_list == m_subp.call_args_list @patch('cloudinit.subp.subp', return_value=('', '')) def test_bring_up_all_interfaces_v1( - self, m_subp, activator, available_calls, expected_call_list, - available_mocks + self, m_subp, activator, expected_call_list, available_mocks ): network_state = parse_net_config_data(load(V1_CONFIG)) activator.bring_up_all_interfaces(network_state) @@ -168,10 +173,60 @@ class TestIfUpDownActivator: @patch('cloudinit.subp.subp', return_value=('', '')) def test_bring_up_all_interfaces_v2( - self, m_subp, activator, available_calls, expected_call_list, - available_mocks + self, m_subp, activator, expected_call_list, available_mocks ): network_state = parse_net_config_data(load(V2_CONFIG)) activator.bring_up_all_interfaces(network_state) for call in m_subp.call_args_list: assert call in expected_call_list + + +IF_UP_DOWN_BRING_DOWN_CALL_LIST = [ + ((['ifdown', 'eth0'], ), {}), + ((['ifdown', 'eth1'], ), {}), +] + +NETWORK_MANAGER_BRING_DOWN_CALL_LIST = [ + ((['nmcli', 'connection', 'down', 'eth0'], ), {}), + ((['nmcli', 'connection', 'down', 'eth1'], ), {}), +] + + +@pytest.mark.parametrize('activator, expected_call_list', [ + (IfUpDownActivator, IF_UP_DOWN_BRING_DOWN_CALL_LIST), + (NetplanActivator, NETPLAN_CALL_LIST), + (NetworkManagerActivator, NETWORK_MANAGER_BRING_DOWN_CALL_LIST), +]) +class TestActivatorsBringDown: + @patch('cloudinit.subp.subp', return_value=('', '')) + def test_bring_down_interface( + self, m_subp, activator, expected_call_list, available_mocks + ): + activator.bring_down_interface('eth0') + assert len(m_subp.call_args_list) == 1 + assert m_subp.call_args_list[0] == expected_call_list[0] + + @patch('cloudinit.subp.subp', return_value=('', '')) + def test_bring_down_interfaces( + self, m_subp, activator, expected_call_list, available_mocks + ): + activator.bring_down_interfaces(['eth0', 'eth1']) + assert expected_call_list == m_subp.call_args_list + + @patch('cloudinit.subp.subp', return_value=('', '')) + def test_bring_down_all_interfaces_v1( + self, m_subp, activator, expected_call_list, available_mocks + ): + network_state = parse_net_config_data(load(V1_CONFIG)) + activator.bring_down_all_interfaces(network_state) + for call in m_subp.call_args_list: + assert call in expected_call_list + + @patch('cloudinit.subp.subp', return_value=('', '')) + def test_bring_down_all_interfaces_v2( + self, m_subp, activator, expected_call_list, available_mocks + ): + network_state = parse_net_config_data(load(V2_CONFIG)) + activator.bring_down_all_interfaces(network_state) + for call in m_subp.call_args_list: + assert call in expected_call_list diff --git a/tools/hook-hotplug b/tools/hook-hotplug new file mode 100755 index 00000000..34e95929 --- /dev/null +++ b/tools/hook-hotplug @@ -0,0 +1,21 @@ +#!/bin/bash +# This file is part of cloud-init. See LICENSE file for license information. + +# This script checks if cloud-init has hotplug hooked and if +# cloud-init has finished; if so invoke cloud-init hotplug-hook + +is_finished() { + [ -e /run/cloud-init/result.json ] +} + +if is_finished; then + # open cloud-init's hotplug-hook fifo rw + exec 3<>/run/cloud-init/hook-hotplug-cmd + env_params=( + --devpath="${DEVPATH}" + --subsystem="${SUBSYSTEM}" + --udevaction="${ACTION}" + ) + # write params to cloud-init's hotplug-hook fifo + echo "${env_params[@]}" >&3 +fi diff --git a/udev/10-cloud-init-hook-hotplug.rules b/udev/10-cloud-init-hook-hotplug.rules new file mode 100644 index 00000000..2e382679 --- /dev/null +++ b/udev/10-cloud-init-hook-hotplug.rules @@ -0,0 +1,6 @@ +# This file is part of cloud-init. See LICENSE file for license information. +# Handle device adds only +ACTION!="add|remove", GOTO="cloudinit_end" +LABEL="cloudinit_hook" +SUBSYSTEM=="net|block", RUN+="/usr/lib/cloud-init/hook-hotplug" +LABEL="cloudinit_end" |