diff options
Diffstat (limited to 'cloudinit/distros/__init__.py')
-rwxr-xr-x | cloudinit/distros/__init__.py | 190 |
1 files changed, 167 insertions, 23 deletions
diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 92598a2d..2537608f 100755 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -13,6 +13,8 @@ import abc import os import re import stat +import string +import urllib.parse from io import StringIO from cloudinit import importer @@ -23,9 +25,14 @@ from cloudinit.net import network_state from cloudinit.net import renderers from cloudinit import ssh_util from cloudinit import type_utils +from cloudinit import subp from cloudinit import util +from cloudinit.features import \ + ALLOW_EC2_MIRRORS_ON_NON_AWS_INSTANCE_TYPES + from cloudinit.distros.parsers import hosts +from .networking import LinuxNetworking # Used when a cloud-config module can be run on all cloud-init distibutions. @@ -33,12 +40,13 @@ from cloudinit.distros.parsers import hosts ALL_DISTROS = 'all' OSFAMILIES = { + 'alpine': ['alpine'], + 'arch': ['arch'], 'debian': ['debian', 'ubuntu'], - 'redhat': ['amazon', 'centos', 'fedora', 'rhel'], - 'gentoo': ['gentoo'], 'freebsd': ['freebsd'], + 'gentoo': ['gentoo'], + 'redhat': ['amazon', 'centos', 'fedora', 'rhel'], 'suse': ['opensuse', 'sles'], - 'arch': ['arch'], } LOG = logging.getLogger(__name__) @@ -50,6 +58,9 @@ _EC2_AZ_RE = re.compile('^[a-z][a-z]-(?:[a-z]+-)+[0-9][a-z]$') # Default NTP Client Configurations PREFERRED_NTP_CLIENTS = ['chrony', 'systemd-timesyncd', 'ntp', 'ntpdate'] +# Letters/Digits/Hyphen characters, for use in domain name validation +LDH_ASCII_CHARS = string.ascii_letters + string.digits + "-" + class Distro(metaclass=abc.ABCMeta): @@ -61,11 +72,13 @@ class Distro(metaclass=abc.ABCMeta): init_cmd = ['service'] # systemctl, service etc renderer_configs = {} _preferred_ntp_clients = None + networking_cls = LinuxNetworking def __init__(self, name, cfg, paths): self._paths = paths self._cfg = cfg self.name = name + self.networking = self.networking_cls() @abc.abstractmethod def install_packages(self, pkglist): @@ -220,8 +233,8 @@ class Distro(metaclass=abc.ABCMeta): LOG.debug("Non-persistently setting the system hostname to %s", hostname) try: - util.subp(['hostname', hostname]) - except util.ProcessExecutionError: + subp.subp(['hostname', hostname]) + except subp.ProcessExecutionError: util.logexc(LOG, "Failed to non-persistently adjust the system " "hostname to %s", hostname) @@ -356,12 +369,12 @@ class Distro(metaclass=abc.ABCMeta): LOG.debug("Attempting to run bring up interface %s using command %s", device_name, cmd) try: - (_out, err) = util.subp(cmd) + (_out, err) = subp.subp(cmd) if len(err): LOG.warning("Running %s resulted in stderr output: %s", cmd, err) return True - except util.ProcessExecutionError: + except subp.ProcessExecutionError: util.logexc(LOG, "Running interface command %s failed", cmd) return False @@ -380,6 +393,9 @@ class Distro(metaclass=abc.ABCMeta): def add_user(self, name, **kwargs): """ Add a user to the system using standard GNU tools + + This should be overriden on distros where useradd is not desirable or + not available. """ # XXX need to make add_user idempotent somehow as we # still want to add groups or modify SSH keys on pre-existing @@ -475,7 +491,7 @@ class Distro(metaclass=abc.ABCMeta): # Run the command LOG.debug("Adding user %s", name) try: - util.subp(useradd_cmd, logstring=log_useradd_cmd) + subp.subp(useradd_cmd, logstring=log_useradd_cmd) except Exception as e: util.logexc(LOG, "Failed to create user %s", name) raise e @@ -495,7 +511,7 @@ class Distro(metaclass=abc.ABCMeta): # Run the command LOG.debug("Adding snap user %s", name) try: - (out, err) = util.subp(create_user_cmd, logstring=create_user_cmd, + (out, err) = subp.subp(create_user_cmd, logstring=create_user_cmd, capture=True) LOG.debug("snap create-user returned: %s:%s", out, err) jobj = util.load_json(out) @@ -508,9 +524,22 @@ class Distro(metaclass=abc.ABCMeta): def create_user(self, name, **kwargs): """ - Creates users for the system using the GNU passwd tools. This - will work on an GNU system. This should be overriden on - distros where useradd is not desirable or not available. + Creates or partially updates the ``name`` user in the system. + + This defers the actual user creation to ``self.add_user`` or + ``self.add_snap_user``, and most of the keys in ``kwargs`` will be + processed there if and only if the user does not already exist. + + Once the existence of the ``name`` user has been ensured, this method + then processes these keys (for both just-created and pre-existing + users): + + * ``plain_text_passwd`` + * ``hashed_passwd`` + * ``lock_passwd`` + * ``sudo`` + * ``ssh_authorized_keys`` + * ``ssh_redirect_user`` """ # Add a snap user, if requested @@ -577,20 +606,21 @@ class Distro(metaclass=abc.ABCMeta): # passwd must use short '-l' due to SLES11 lacking long form '--lock' lock_tools = (['passwd', '-l', name], ['usermod', '--lock', name]) try: - cmd = next(l for l in lock_tools if util.which(l[0])) - except StopIteration: + cmd = next(tool for tool in lock_tools if subp.which(tool[0])) + except StopIteration as e: raise RuntimeError(( "Unable to lock user account '%s'. No tools available. " - " Tried: %s.") % (name, [c[0] for c in lock_tools])) + " Tried: %s.") % (name, [c[0] for c in lock_tools]) + ) from e try: - util.subp(cmd) + subp.subp(cmd) except Exception as e: util.logexc(LOG, 'Failed to disable password for user %s', name) raise e def expire_passwd(self, user): try: - util.subp(['passwd', '--expire', user]) + subp.subp(['passwd', '--expire', user]) except Exception as e: util.logexc(LOG, "Failed to set 'expire' for %s", user) raise e @@ -606,7 +636,7 @@ class Distro(metaclass=abc.ABCMeta): cmd.append('-e') try: - util.subp(cmd, pass_string, logstring="chpasswd for %s" % user) + subp.subp(cmd, pass_string, logstring="chpasswd for %s" % user) except Exception as e: util.logexc(LOG, "Failed to set password for %s", user) raise e @@ -703,7 +733,7 @@ class Distro(metaclass=abc.ABCMeta): LOG.warning("Skipping creation of existing group '%s'", name) else: try: - util.subp(group_add_cmd) + subp.subp(group_add_cmd) LOG.info("Created new group %s", name) except Exception: util.logexc(LOG, "Failed to create group %s", name) @@ -716,10 +746,115 @@ class Distro(metaclass=abc.ABCMeta): "; user does not exist.", member, name) continue - util.subp(['usermod', '-a', '-G', name, member]) + subp.subp(['usermod', '-a', '-G', name, member]) LOG.info("Added user '%s' to group '%s'", member, name) +def _apply_hostname_transformations_to_url(url: str, transformations: list): + """ + Apply transformations to a URL's hostname, return transformed URL. + + This is a separate function because unwrapping and rewrapping only the + hostname portion of a URL is complex. + + :param url: + The URL to operate on. + :param transformations: + A list of ``(str) -> Optional[str]`` functions, which will be applied + in order to the hostname portion of the URL. If any function + (regardless of ordering) returns None, ``url`` will be returned without + any modification. + + :return: + A string whose value is ``url`` with the hostname ``transformations`` + applied, or ``None`` if ``url`` is unparseable. + """ + try: + parts = urllib.parse.urlsplit(url) + except ValueError: + # If we can't even parse the URL, we shouldn't use it for anything + return None + new_hostname = parts.hostname + if new_hostname is None: + # The URL given doesn't have a hostname component, so (a) we can't + # transform it, and (b) it won't work as a mirror; return None. + return None + + for transformation in transformations: + new_hostname = transformation(new_hostname) + if new_hostname is None: + # If a transformation returns None, that indicates we should abort + # processing and return `url` unmodified + return url + + new_netloc = new_hostname + if parts.port is not None: + new_netloc = "{}:{}".format(new_netloc, parts.port) + return urllib.parse.urlunsplit(parts._replace(netloc=new_netloc)) + + +def _sanitize_mirror_url(url: str): + """ + Given a mirror URL, replace or remove any invalid URI characters. + + This performs the following actions on the URL's hostname: + * Checks if it is an IP address, returning the URL immediately if it is + * Converts it to its IDN form (see below for details) + * Replaces any non-Letters/Digits/Hyphen (LDH) characters in it with + hyphens + * TODO: Remove any leading/trailing hyphens from each domain name label + + Before we replace any invalid domain name characters, we first need to + ensure that any valid non-ASCII characters in the hostname will not be + replaced, by ensuring the hostname is in its Internationalized domain name + (IDN) representation (see RFC 5890). This conversion has to be applied to + the whole hostname (rather than just the substitution variables), because + the Punycode algorithm used by IDNA transcodes each part of the hostname as + a whole string (rather than encoding individual characters). It cannot be + applied to the whole URL, because (a) the Punycode algorithm expects to + operate on domain names so doesn't output a valid URL, and (b) non-ASCII + characters in non-hostname parts of the URL aren't encoded via Punycode. + + To put this in RFC 5890's terminology: before we remove or replace any + characters from our domain name (which we do to ensure that each label is a + valid LDH Label), we first ensure each label is in its A-label form. + + (Note that Python's builtin idna encoding is actually IDNA2003, not + IDNA2008. This changes the specifics of how some characters are encoded to + ASCII, but doesn't affect the logic here.) + + :param url: + The URL to operate on. + + :return: + A sanitized version of the URL, which will have been IDNA encoded if + necessary, or ``None`` if the generated string is not a parseable URL. + """ + # Acceptable characters are LDH characters, plus "." to separate each label + acceptable_chars = LDH_ASCII_CHARS + "." + transformations = [ + # This is an IP address, not a hostname, so no need to apply the + # transformations + lambda hostname: None if net.is_ip_address(hostname) else hostname, + + # Encode with IDNA to get the correct characters (as `bytes`), then + # decode with ASCII so we return a `str` + lambda hostname: hostname.encode('idna').decode('ascii'), + + # Replace any unacceptable characters with "-" + lambda hostname: ''.join( + c if c in acceptable_chars else "-" for c in hostname + ), + + # Drop leading/trailing hyphens from each part of the hostname + lambda hostname: '.'.join( + part.strip('-') for part in hostname.split('.') + ), + ] + + return _apply_hostname_transformations_to_url(url, transformations) + + def _get_package_mirror_info(mirror_info, data_source=None, mirror_filter=util.search_for_mirror): # given a arch specific 'mirror_info' entry (from package_mirrors) @@ -735,7 +870,12 @@ def _get_package_mirror_info(mirror_info, data_source=None, # ec2 availability zones are named cc-direction-[0-9][a-d] (us-east-1b) # the region is us-east-1. so region = az[0:-1] if _EC2_AZ_RE.match(data_source.availability_zone): - subst['ec2_region'] = "%s" % data_source.availability_zone[0:-1] + ec2_region = data_source.availability_zone[0:-1] + + if ALLOW_EC2_MIRRORS_ON_NON_AWS_INSTANCE_TYPES: + subst['ec2_region'] = "%s" % ec2_region + elif data_source.platform_type == "ec2": + subst['ec2_region'] = "%s" % ec2_region if data_source and data_source.region: subst['region'] = data_source.region @@ -748,9 +888,13 @@ def _get_package_mirror_info(mirror_info, data_source=None, mirrors = [] for tmpl in searchlist: try: - mirrors.append(tmpl % subst) + mirror = tmpl % subst except KeyError: - pass + continue + + mirror = _sanitize_mirror_url(mirror) + if mirror is not None: + mirrors.append(mirror) found = mirror_filter(mirrors) if found: |