diff options
Diffstat (limited to 'cloudinit/sources')
24 files changed, 2006 insertions, 357 deletions
| diff --git a/cloudinit/sources/DataSourceAliYun.py b/cloudinit/sources/DataSourceAliYun.py index 22279d09..858e0827 100644 --- a/cloudinit/sources/DataSourceAliYun.py +++ b/cloudinit/sources/DataSourceAliYun.py @@ -45,7 +45,7 @@ def _is_aliyun():  def parse_public_keys(public_keys):      keys = [] -    for key_id, key_body in public_keys.items(): +    for _key_id, key_body in public_keys.items():          if isinstance(key_body, str):              keys.append(key_body.strip())          elif isinstance(key_body, list): diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py index e1d0055b..8cd312d0 100644 --- a/cloudinit/sources/DataSourceAltCloud.py +++ b/cloudinit/sources/DataSourceAltCloud.py @@ -29,7 +29,6 @@ CLOUD_INFO_FILE = '/etc/sysconfig/cloud-info'  # Shell command lists  CMD_PROBE_FLOPPY = ['modprobe', 'floppy'] -CMD_UDEVADM_SETTLE = ['udevadm', 'settle', '--timeout=5']  META_DATA_NOT_SUPPORTED = {      'block-device-mapping': {}, @@ -182,29 +181,18 @@ class DataSourceAltCloud(sources.DataSource):          # modprobe floppy          try: -            cmd = CMD_PROBE_FLOPPY -            (cmd_out, _err) = util.subp(cmd) -            LOG.debug('Command: %s\nOutput%s', ' '.join(cmd), cmd_out) -        except ProcessExecutionError as _err: -            util.logexc(LOG, 'Failed command: %s\n%s', ' '.join(cmd), _err) -            return False -        except OSError as _err: -            util.logexc(LOG, 'Failed command: %s\n%s', ' '.join(cmd), _err) +            modprobe_floppy() +        except ProcessExecutionError as e: +            util.logexc(LOG, 'Failed modprobe: %s', e)              return False          floppy_dev = '/dev/fd0'          # udevadm settle for floppy device          try: -            cmd = CMD_UDEVADM_SETTLE -            cmd.append('--exit-if-exists=' + floppy_dev) -            (cmd_out, _err) = util.subp(cmd) -            LOG.debug('Command: %s\nOutput%s', ' '.join(cmd), cmd_out) -        except ProcessExecutionError as _err: -            util.logexc(LOG, 'Failed command: %s\n%s', ' '.join(cmd), _err) -            return False -        except OSError as _err: -            util.logexc(LOG, 'Failed command: %s\n%s', ' '.join(cmd), _err) +            util.udevadm_settle(exists=floppy_dev, timeout=5) +        except (ProcessExecutionError, OSError) as e: +            util.logexc(LOG, 'Failed udevadm_settle: %s\n', e)              return False          try: @@ -261,6 +249,11 @@ class DataSourceAltCloud(sources.DataSource):              return False +def modprobe_floppy(): +    out, _err = util.subp(CMD_PROBE_FLOPPY) +    LOG.debug('Command: %s\nOutput%s', ' '.join(CMD_PROBE_FLOPPY), out) + +  # Used to match classes to dependencies  # Source DataSourceAltCloud does not really depend on networking.  # In the future 'dsmode' like behavior can be added to offer user diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index b66da647..629f006f 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -8,6 +8,7 @@ import base64  import contextlib  import crypt  from functools import partial +import json  import os  import os.path  import re @@ -18,6 +19,7 @@ import xml.etree.ElementTree as ET  from cloudinit import log as logging  from cloudinit import net +from cloudinit.event import EventType  from cloudinit.net.dhcp import EphemeralDHCPv4  from cloudinit import sources  from cloudinit.sources.helpers.azure import get_metadata_from_fabric @@ -49,7 +51,18 @@ DEFAULT_FS = 'ext4'  # DMI chassis-asset-tag is set static for all azure instances  AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'  REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds" -IMDS_URL = "http://169.254.169.254/metadata/reprovisiondata" +REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready" +AGENT_SEED_DIR = '/var/lib/waagent' +IMDS_URL = "http://169.254.169.254/metadata/" + +# List of static scripts and network config artifacts created by +# stock ubuntu suported images. +UBUNTU_EXTENDED_NETWORK_SCRIPTS = [ +    '/etc/netplan/90-azure-hotplug.yaml', +    '/usr/local/sbin/ephemeral_eth.sh', +    '/etc/udev/rules.d/10-net-device-added.rules', +    '/run/network/interfaces.ephemeral.d', +]  def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid): @@ -108,31 +121,24 @@ def find_dev_from_busdev(camcontrol_out, busdev):      return None -def get_dev_storvsc_sysctl(): +def execute_or_debug(cmd, fail_ret=None):      try: -        sysctl_out, err = util.subp(['sysctl', 'dev.storvsc']) +        return util.subp(cmd)[0]      except util.ProcessExecutionError: -        LOG.debug("Fail to execute sysctl dev.storvsc") -        sysctl_out = "" -    return sysctl_out +        LOG.debug("Failed to execute: %s", ' '.join(cmd)) +        return fail_ret + + +def get_dev_storvsc_sysctl(): +    return execute_or_debug(["sysctl", "dev.storvsc"], fail_ret="")  def get_camcontrol_dev_bus(): -    try: -        camcontrol_b_out, err = util.subp(['camcontrol', 'devlist', '-b']) -    except util.ProcessExecutionError: -        LOG.debug("Fail to execute camcontrol devlist -b") -        return None -    return camcontrol_b_out +    return execute_or_debug(['camcontrol', 'devlist', '-b'])  def get_camcontrol_dev(): -    try: -        camcontrol_out, err = util.subp(['camcontrol', 'devlist']) -    except util.ProcessExecutionError: -        LOG.debug("Fail to execute camcontrol devlist") -        return None -    return camcontrol_out +    return execute_or_debug(['camcontrol', 'devlist'])  def get_resource_disk_on_freebsd(port_id): @@ -192,7 +198,7 @@ if util.is_FreeBSD():  BUILTIN_DS_CONFIG = {      'agent_command': AGENT_START_BUILTIN, -    'data_dir': "/var/lib/waagent", +    'data_dir': AGENT_SEED_DIR,      'set_hostname': True,      'hostname_bounce': {          'interface': DEFAULT_PRIMARY_NIC, @@ -215,6 +221,7 @@ BUILTIN_CLOUD_CONFIG = {  }  DS_CFG_PATH = ['datasource', DS_NAME] +DS_CFG_KEY_PRESERVE_NTFS = 'never_destroy_ntfs'  DEF_EPHEMERAL_LABEL = 'Temporary Storage'  # The redacted password fails to meet password complexity requirements @@ -258,6 +265,7 @@ class DataSourceAzure(sources.DataSource):      dsname = 'Azure'      _negotiated = False +    _metadata_imds = sources.UNSET      process_name = 'dhclient'      tmpps = os.popen("ps -Af").read() @@ -274,6 +282,8 @@ class DataSourceAzure(sources.DataSource):              BUILTIN_DS_CONFIG])          self.dhclient_lease_file = self.ds_cfg.get('dhclient_lease_file')          self._network_config = None +        # Regenerate network config new_instance boot and every boot +        self.update_events['network'].add(EventType.BOOT)      def __str__(self):          root = sources.DataSource.__str__(self) @@ -347,15 +357,17 @@ class DataSourceAzure(sources.DataSource):          metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files)          return metadata -    def _get_data(self): +    def crawl_metadata(self): +        """Walk all instance metadata sources returning a dict on success. + +        @return: A dictionary of any metadata content for this instance. +        @raise: InvalidMetaDataException when the expected metadata service is +            unavailable, broken or disabled. +        """ +        crawled_data = {}          # azure removes/ejects the cdrom containing the ovf-env.xml          # file on reboot.  So, in order to successfully reboot we          # need to look in the datadir and consider that valid -        asset_tag = util.read_dmi_data('chassis-asset-tag') -        if asset_tag != AZURE_CHASSIS_ASSET_TAG: -            LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) -            return False -          ddir = self.ds_cfg['data_dir']          candidates = [self.seed_dir] @@ -384,46 +396,84 @@ class DataSourceAzure(sources.DataSource):              except NonAzureDataSource:                  continue              except BrokenAzureDataSource as exc: -                raise exc +                msg = 'BrokenAzureDataSource: %s' % exc +                raise sources.InvalidMetaDataException(msg)              except util.MountFailedError:                  LOG.warning("%s was not mountable", cdev)                  continue              if reprovision or self._should_reprovision(ret):                  ret = self._reprovision() -            (md, self.userdata_raw, cfg, files) = ret +            imds_md = get_metadata_from_imds( +                self.fallback_interface, retries=3) +            (md, userdata_raw, cfg, files) = ret              self.seed = cdev -            self.metadata = util.mergemanydict([md, DEFAULT_METADATA]) -            self.cfg = util.mergemanydict([cfg, BUILTIN_CLOUD_CONFIG]) +            crawled_data.update({ +                'cfg': cfg, +                'files': files, +                'metadata': util.mergemanydict( +                    [md, {'imds': imds_md}]), +                'userdata_raw': userdata_raw})              found = cdev              LOG.debug("found datasource in %s", cdev)              break          if not found: -            return False +            raise sources.InvalidMetaDataException('No Azure metadata found')          if found == ddir:              LOG.debug("using files cached in %s", ddir) -        # azure / hyper-v provides random data here -        # TODO. find the seed on FreeBSD platform -        # now update ds_cfg to reflect contents pass in config -        if not util.is_FreeBSD(): -            seed = util.load_file("/sys/firmware/acpi/tables/OEM0", -                                  quiet=True, decode=False) -            if seed: -                self.metadata['random_seed'] = seed +        seed = _get_random_seed() +        if seed: +            crawled_data['metadata']['random_seed'] = seed +        crawled_data['metadata']['instance-id'] = util.read_dmi_data( +            'system-uuid') +        return crawled_data + +    def _is_platform_viable(self): +        """Check platform environment to report if this datasource may run.""" +        return _is_platform_viable(self.seed_dir) + +    def clear_cached_attrs(self, attr_defaults=()): +        """Reset any cached class attributes to defaults.""" +        super(DataSourceAzure, self).clear_cached_attrs(attr_defaults) +        self._metadata_imds = sources.UNSET + +    def _get_data(self): +        """Crawl and process datasource metadata caching metadata as attrs. + +        @return: True on success, False on error, invalid or disabled +            datasource. +        """ +        if not self._is_platform_viable(): +            return False +        try: +            crawled_data = util.log_time( +                        logfunc=LOG.debug, msg='Crawl of metadata service', +                        func=self.crawl_metadata) +        except sources.InvalidMetaDataException as e: +            LOG.warning('Could not crawl Azure metadata: %s', e) +            return False +        if self.distro and self.distro.name == 'ubuntu': +            maybe_remove_ubuntu_network_config_scripts() + +        # Process crawled data and augment with various config defaults +        self.cfg = util.mergemanydict( +            [crawled_data['cfg'], BUILTIN_CLOUD_CONFIG]) +        self._metadata_imds = crawled_data['metadata']['imds'] +        self.metadata = util.mergemanydict( +            [crawled_data['metadata'], DEFAULT_METADATA]) +        self.userdata_raw = crawled_data['userdata_raw']          user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {})          self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg])          # walinux agent writes files world readable, but expects          # the directory to be protected. -        write_files(ddir, files, dirmode=0o700) - -        self.metadata['instance-id'] = util.read_dmi_data('system-uuid') - +        write_files( +            self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700)          return True      def device_name_to_device(self, name): @@ -449,11 +499,12 @@ class DataSourceAzure(sources.DataSource):              LOG.debug("negotiating already done for %s",                        self.get_instance_id()) -    def _poll_imds(self, report_ready=True): +    def _poll_imds(self):          """Poll IMDS for the new provisioning data until we get a valid          response. Then return the returned JSON object.""" -        url = IMDS_URL + "?api-version=2017-04-02" +        url = IMDS_URL + "reprovisiondata?api-version=2017-04-02"          headers = {"Metadata": "true"} +        report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE))          LOG.debug("Start polling IMDS")          def exc_cb(msg, exception): @@ -463,13 +514,17 @@ class DataSourceAzure(sources.DataSource):              # call DHCP and setup the ephemeral network to acquire the new IP.              return False -        need_report = report_ready          while True:              try:                  with EphemeralDHCPv4() as lease: -                    if need_report: +                    if report_ready: +                        path = REPORTED_READY_MARKER_FILE +                        LOG.info( +                            "Creating a marker file to report ready: %s", path) +                        util.write_file(path, "{pid}: {time}\n".format( +                            pid=os.getpid(), time=time()))                          self._report_ready(lease=lease) -                        need_report = False +                        report_ready = False                      return readurl(url, timeout=1, headers=headers,                                     exception_cb=exc_cb, infinite=True).contents              except UrlError: @@ -480,7 +535,7 @@ class DataSourceAzure(sources.DataSource):             before we go into our polling loop."""          try:              get_metadata_from_fabric(None, lease['unknown-245']) -        except Exception as exc: +        except Exception:              LOG.warning(                  "Error communicating with Azure fabric; You may experience."                  "connectivity issues.", exc_info=True) @@ -498,13 +553,15 @@ class DataSourceAzure(sources.DataSource):          jump back into the polling loop in order to retrieve the ovf_env."""          if not ret:              return False -        (md, self.userdata_raw, cfg, files) = ret +        (_md, _userdata_raw, cfg, _files) = ret          path = REPROVISION_MARKER_FILE          if (cfg.get('PreprovisionedVm') is True or                  os.path.isfile(path)):              if not os.path.isfile(path): -                LOG.info("Creating a marker file to poll imds") -                util.write_file(path, "%s: %s\n" % (os.getpid(), time())) +                LOG.info("Creating a marker file to poll imds: %s", +                         path) +                util.write_file(path, "{pid}: {time}\n".format( +                    pid=os.getpid(), time=time()))              return True          return False @@ -534,37 +591,33 @@ class DataSourceAzure(sources.DataSource):                    self.ds_cfg['agent_command'])          try:              fabric_data = metadata_func() -        except Exception as exc: +        except Exception:              LOG.warning(                  "Error communicating with Azure fabric; You may experience."                  "connectivity issues.", exc_info=True)              return False +        util.del_file(REPORTED_READY_MARKER_FILE)          util.del_file(REPROVISION_MARKER_FILE)          return fabric_data      def activate(self, cfg, is_new_instance): -        address_ephemeral_resize(is_new_instance=is_new_instance) +        address_ephemeral_resize(is_new_instance=is_new_instance, +                                 preserve_ntfs=self.ds_cfg.get( +                                     DS_CFG_KEY_PRESERVE_NTFS, False))          return      @property      def network_config(self):          """Generate a network config like net.generate_fallback_network() with -           the following execptions. +           the following exceptions.             1. Probe the drivers of the net-devices present and inject them in                the network configuration under params: driver: <driver> value             2. Generate a fallback network config that does not include any of                the blacklisted devices.          """ -        blacklist = ['mlx4_core']          if not self._network_config: -            LOG.debug('Azure: generating fallback configuration') -            # generate a network config, blacklist picking any mlx4_core devs -            netconfig = net.generate_fallback_config( -                blacklist_drivers=blacklist, config_driver=True) - -            self._network_config = netconfig - +            self._network_config = parse_network_config(self._metadata_imds)          return self._network_config @@ -587,17 +640,29 @@ def _has_ntfs_filesystem(devpath):      return os.path.realpath(devpath) in ntfs_devices -def can_dev_be_reformatted(devpath): -    """Determine if block device devpath is newly formatted ephemeral. +def can_dev_be_reformatted(devpath, preserve_ntfs): +    """Determine if the ephemeral drive at devpath should be reformatted. -    A newly formatted disk will: +    A fresh ephemeral disk is formatted by Azure and will:        a.) have a partition table (dos or gpt)        b.) have 1 partition that is ntfs formatted, or            have 2 partitions with the second partition ntfs formatted.            (larger instances with >2TB ephemeral disk have gpt, and will             have a microsoft reserved partition as part 1.  LP: #1686514)        c.) the ntfs partition will have no files other than possibly -          'dataloss_warning_readme.txt'""" +          'dataloss_warning_readme.txt' + +    User can indicate that NTFS should never be destroyed by setting +    DS_CFG_KEY_PRESERVE_NTFS in dscfg. +    If data is found on NTFS, user is warned to set DS_CFG_KEY_PRESERVE_NTFS +    to make sure cloud-init does not accidentally wipe their data. +    If cloud-init cannot mount the disk to check for data, destruction +    will be allowed, unless the dscfg key is set.""" +    if preserve_ntfs: +        msg = ('config says to never destroy NTFS (%s.%s), skipping checks' % +               (".".join(DS_CFG_PATH), DS_CFG_KEY_PRESERVE_NTFS)) +        return False, msg +      if not os.path.exists(devpath):          return False, 'device %s does not exist' % devpath @@ -630,18 +695,27 @@ def can_dev_be_reformatted(devpath):      bmsg = ('partition %s (%s) on device %s was ntfs formatted' %              (cand_part, cand_path, devpath))      try: -        file_count = util.mount_cb(cand_path, count_files) +        file_count = util.mount_cb(cand_path, count_files, mtype="ntfs", +                                   update_env_for_mount={'LANG': 'C'})      except util.MountFailedError as e: +        if "mount: unknown filesystem type 'ntfs'" in str(e): +            return True, (bmsg + ' but this system cannot mount NTFS,' +                          ' assuming there are no important files.' +                          ' Formatting allowed.')          return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e)      if file_count != 0: +        LOG.warning("it looks like you're using NTFS on the ephemeral disk, " +                    'to ensure that filesystem does not get wiped, set ' +                    '%s.%s in config', '.'.join(DS_CFG_PATH), +                    DS_CFG_KEY_PRESERVE_NTFS)          return False, bmsg + ' but had %d files on it.' % file_count      return True, bmsg + ' and had no important files. Safe for reformatting.'  def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, -                             is_new_instance=False): +                             is_new_instance=False, preserve_ntfs=False):      # wait for ephemeral disk to come up      naplen = .2      missing = util.wait_for_files([devpath], maxwait=maxwait, naplen=naplen, @@ -657,7 +731,7 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,      if is_new_instance:          result, msg = (True, "First instance boot.")      else: -        result, msg = can_dev_be_reformatted(devpath) +        result, msg = can_dev_be_reformatted(devpath, preserve_ntfs)      LOG.debug("reformattable=%s: %s", result, msg)      if not result: @@ -971,6 +1045,18 @@ def _check_freebsd_cdrom(cdrom_dev):      return False +def _get_random_seed(): +    """Return content random seed file if available, otherwise, +       return None.""" +    # azure / hyper-v provides random data here +    # TODO. find the seed on FreeBSD platform +    # now update ds_cfg to reflect contents pass in config +    if util.is_FreeBSD(): +        return None +    return util.load_file("/sys/firmware/acpi/tables/OEM0", +                          quiet=True, decode=False) + +  def list_possible_azure_ds_devs():      devlist = []      if util.is_FreeBSD(): @@ -998,6 +1084,151 @@ def load_azure_ds_dir(source_dir):      return (md, ud, cfg, {'ovf-env.xml': contents}) +def parse_network_config(imds_metadata): +    """Convert imds_metadata dictionary to network v2 configuration. + +    Parses network configuration from imds metadata if present or generate +    fallback network config excluding mlx4_core devices. + +    @param: imds_metadata: Dict of content read from IMDS network service. +    @return: Dictionary containing network version 2 standard configuration. +    """ +    if imds_metadata != sources.UNSET and imds_metadata: +        netconfig = {'version': 2, 'ethernets': {}} +        LOG.debug('Azure: generating network configuration from IMDS') +        network_metadata = imds_metadata['network'] +        for idx, intf in enumerate(network_metadata['interface']): +            nicname = 'eth{idx}'.format(idx=idx) +            dev_config = {} +            for addr4 in intf['ipv4']['ipAddress']: +                privateIpv4 = addr4['privateIpAddress'] +                if privateIpv4: +                    if dev_config.get('dhcp4', False): +                        # Append static address config for nic > 1 +                        netPrefix = intf['ipv4']['subnet'][0].get( +                            'prefix', '24') +                        if not dev_config.get('addresses'): +                            dev_config['addresses'] = [] +                        dev_config['addresses'].append( +                            '{ip}/{prefix}'.format( +                                ip=privateIpv4, prefix=netPrefix)) +                    else: +                        dev_config['dhcp4'] = True +            for addr6 in intf['ipv6']['ipAddress']: +                privateIpv6 = addr6['privateIpAddress'] +                if privateIpv6: +                    dev_config['dhcp6'] = True +                    break +            if dev_config: +                mac = ':'.join(re.findall(r'..', intf['macAddress'])) +                dev_config.update( +                    {'match': {'macaddress': mac.lower()}, +                     'set-name': nicname}) +                netconfig['ethernets'][nicname] = dev_config +    else: +        blacklist = ['mlx4_core'] +        LOG.debug('Azure: generating fallback configuration') +        # generate a network config, blacklist picking mlx4_core devs +        netconfig = net.generate_fallback_config( +            blacklist_drivers=blacklist, config_driver=True) +    return netconfig + + +def get_metadata_from_imds(fallback_nic, retries): +    """Query Azure's network metadata service, returning a dictionary. + +    If network is not up, setup ephemeral dhcp on fallback_nic to talk to the +    IMDS. For more info on IMDS: +        https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service + +    @param fallback_nic: String. The name of the nic which requires active +        network in order to query IMDS. +    @param retries: The number of retries of the IMDS_URL. + +    @return: A dict of instance metadata containing compute and network +        info. +    """ +    kwargs = {'logfunc': LOG.debug, +              'msg': 'Crawl of Azure Instance Metadata Service (IMDS)', +              'func': _get_metadata_from_imds, 'args': (retries,)} +    if net.is_up(fallback_nic): +        return util.log_time(**kwargs) +    else: +        with EphemeralDHCPv4(fallback_nic): +            return util.log_time(**kwargs) + + +def _get_metadata_from_imds(retries): + +    def retry_on_url_error(msg, exception): +        if isinstance(exception, UrlError) and exception.code == 404: +            return True  # Continue retries +        return False  # Stop retries on all other exceptions + +    url = IMDS_URL + "instance?api-version=2017-12-01" +    headers = {"Metadata": "true"} +    try: +        response = readurl( +            url, timeout=1, headers=headers, retries=retries, +            exception_cb=retry_on_url_error) +    except Exception as e: +        LOG.debug('Ignoring IMDS instance metadata: %s', e) +        return {} +    try: +        return util.load_json(str(response)) +    except json.decoder.JSONDecodeError: +        LOG.warning( +            'Ignoring non-json IMDS instance metadata: %s', str(response)) +    return {} + + +def maybe_remove_ubuntu_network_config_scripts(paths=None): +    """Remove Azure-specific ubuntu network config for non-primary nics. + +    @param paths: List of networking scripts or directories to remove when +        present. + +    In certain supported ubuntu images, static udev rules or netplan yaml +    config is delivered in the base ubuntu image to support dhcp on any +    additional interfaces which get attached by a customer at some point +    after initial boot. Since the Azure datasource can now regenerate +    network configuration as metadata reports these new devices, we no longer +    want the udev rules or netplan's 90-azure-hotplug.yaml to configure +    networking on eth1 or greater as it might collide with cloud-init's +    configuration. + +    Remove the any existing extended network scripts if the datasource is +    enabled to write network per-boot. +    """ +    if not paths: +        paths = UBUNTU_EXTENDED_NETWORK_SCRIPTS +    logged = False +    for path in paths: +        if os.path.exists(path): +            if not logged: +                LOG.info( +                    'Removing Ubuntu extended network scripts because' +                    ' cloud-init updates Azure network configuration on the' +                    ' following event: %s.', +                    EventType.BOOT) +                logged = True +            if os.path.isdir(path): +                util.del_dir(path) +            else: +                util.del_file(path) + + +def _is_platform_viable(seed_dir): +    """Check platform environment to report if this datasource may run.""" +    asset_tag = util.read_dmi_data('chassis-asset-tag') +    if asset_tag == AZURE_CHASSIS_ASSET_TAG: +        return True +    LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) +    if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')): +        return True +    return False + +  class BrokenAzureDataSource(Exception):      pass diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index 0df545fc..d4b758f2 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -68,6 +68,10 @@ class DataSourceCloudStack(sources.DataSource):      dsname = 'CloudStack' +    # Setup read_url parameters per get_url_params. +    url_max_wait = 120 +    url_timeout = 50 +      def __init__(self, sys_cfg, distro, paths):          sources.DataSource.__init__(self, sys_cfg, distro, paths)          self.seed_dir = os.path.join(paths.seed_dir, 'cs') @@ -80,33 +84,18 @@ class DataSourceCloudStack(sources.DataSource):          self.metadata_address = "http://%s/" % (self.vr_addr,)          self.cfg = {} -    def _get_url_settings(self): -        mcfg = self.ds_cfg -        max_wait = 120 -        try: -            max_wait = int(mcfg.get("max_wait", max_wait)) -        except Exception: -            util.logexc(LOG, "Failed to get max wait. using %s", max_wait) +    def wait_for_metadata_service(self): +        url_params = self.get_url_params() -        if max_wait == 0: +        if url_params.max_wait_seconds <= 0:              return False -        timeout = 50 -        try: -            timeout = int(mcfg.get("timeout", timeout)) -        except Exception: -            util.logexc(LOG, "Failed to get timeout, using %s", timeout) - -        return (max_wait, timeout) - -    def wait_for_metadata_service(self): -        (max_wait, timeout) = self._get_url_settings() -          urls = [uhelp.combine_url(self.metadata_address,                                    'latest/meta-data/instance-id')]          start_time = time.time() -        url = uhelp.wait_for_url(urls=urls, max_wait=max_wait, -                                 timeout=timeout, status_cb=LOG.warn) +        url = uhelp.wait_for_url( +            urls=urls, max_wait=url_params.max_wait_seconds, +            timeout=url_params.timeout_seconds, status_cb=LOG.warn)          if url:              LOG.debug("Using metadata source: '%s'", url) diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index c7b5fe5f..664dc4b7 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -43,7 +43,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource):          self.version = None          self.ec2_metadata = None          self._network_config = None -        self.network_json = None +        self.network_json = sources.UNSET          self.network_eni = None          self.known_macs = None          self.files = {} @@ -69,7 +69,8 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource):                  util.logexc(LOG, "Failed reading config drive from %s", sdir)          if not found: -            for dev in find_candidate_devs(): +            dslist = self.sys_cfg.get('datasource_list') +            for dev in find_candidate_devs(dslist=dslist):                  try:                      # Set mtype if freebsd and turn off sync                      if dev.startswith("/dev/cd"): @@ -148,7 +149,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource):      @property      def network_config(self):          if self._network_config is None: -            if self.network_json is not None: +            if self.network_json not in (None, sources.UNSET):                  LOG.debug("network config provided via network_json")                  self._network_config = openstack.convert_net_json(                      self.network_json, known_macs=self.known_macs) @@ -195,7 +196,7 @@ def on_first_boot(data, distro=None, network=True):          net_conf = data.get("network_config", '')          if net_conf and distro:              LOG.warning("Updating network interfaces from config drive") -            distro.apply_network(net_conf) +            distro.apply_network_config(eni.convert_eni_data(net_conf))      write_injected_files(data.get('files')) @@ -211,7 +212,7 @@ def write_injected_files(files):                  util.logexc(LOG, "Failed writing file: %s", filename) -def find_candidate_devs(probe_optical=True): +def find_candidate_devs(probe_optical=True, dslist=None):      """Return a list of devices that may contain the config drive.      The returned list is sorted by search order where the first item has @@ -227,6 +228,9 @@ def find_candidate_devs(probe_optical=True):          * either vfat or iso9660 formated          * labeled with 'config-2' or 'CONFIG-2'      """ +    if dslist is None: +        dslist = [] +      # query optical drive to get it in blkid cache for 2.6 kernels      if probe_optical:          for device in OPTICAL_DEVICES: @@ -257,7 +261,8 @@ def find_candidate_devs(probe_optical=True):      devices = [d for d in candidates                 if d in by_label or not util.is_partition(d)] -    if devices: +    LOG.debug("devices=%s dslist=%s", devices, dslist) +    if devices and "IBMCloud" in dslist:          # IBMCloud uses config-2 label, but limited to a single UUID.          ibm_platform, ibm_path = get_ibm_platform()          if ibm_path in devices: diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index efb29f88..98ea7bbc 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -29,8 +29,6 @@ STRICT_ID_PATH = ("datasource", "Ec2", "strict_id")  STRICT_ID_DEFAULT = "warn"  DEFAULT_PRIMARY_NIC = 'eth0' -_unset = "_unset" -  class Platforms(object):      # TODO Rename and move to cloudinit.cloud.CloudNames @@ -67,15 +65,16 @@ class DataSourceEc2(sources.DataSource):      # for extended metadata content. IPv6 support comes in 2016-09-02      extended_metadata_versions = ['2016-09-02'] +    # Setup read_url parameters per get_url_params. +    url_max_wait = 120 +    url_timeout = 50 +      _cloud_platform = None -    _network_config = _unset  # Used for caching calculated network config v1 +    _network_config = sources.UNSET  # Used to cache calculated network cfg v1      # Whether we want to get network configuration from the metadata service. -    get_network_metadata = False - -    # Track the discovered fallback nic for use in configuration generation. -    _fallback_interface = None +    perform_dhcp_setup = False      def __init__(self, sys_cfg, distro, paths):          super(DataSourceEc2, self).__init__(sys_cfg, distro, paths) @@ -106,7 +105,7 @@ class DataSourceEc2(sources.DataSource):          elif self.cloud_platform == Platforms.NO_EC2_METADATA:              return False -        if self.get_network_metadata:  # Setup networking in init-local stage. +        if self.perform_dhcp_setup:  # Setup networking in init-local stage.              if util.is_FreeBSD():                  LOG.debug("FreeBSD doesn't support running dhclient with -sf")                  return False @@ -166,27 +165,11 @@ class DataSourceEc2(sources.DataSource):          else:              return self.metadata['instance-id'] -    def _get_url_settings(self): -        mcfg = self.ds_cfg -        max_wait = 120 -        try: -            max_wait = int(mcfg.get("max_wait", max_wait)) -        except Exception: -            util.logexc(LOG, "Failed to get max wait. using %s", max_wait) - -        timeout = 50 -        try: -            timeout = max(0, int(mcfg.get("timeout", timeout))) -        except Exception: -            util.logexc(LOG, "Failed to get timeout, using %s", timeout) - -        return (max_wait, timeout) -      def wait_for_metadata_service(self):          mcfg = self.ds_cfg -        (max_wait, timeout) = self._get_url_settings() -        if max_wait <= 0: +        url_params = self.get_url_params() +        if url_params.max_wait_seconds <= 0:              return False          # Remove addresses from the list that wont resolve. @@ -213,7 +196,8 @@ class DataSourceEc2(sources.DataSource):          start_time = time.time()          url = uhelp.wait_for_url( -            urls=urls, max_wait=max_wait, timeout=timeout, status_cb=LOG.warn) +            urls=urls, max_wait=url_params.max_wait_seconds, +            timeout=url_params.timeout_seconds, status_cb=LOG.warn)          if url:              self.metadata_address = url2base[url] @@ -318,11 +302,11 @@ class DataSourceEc2(sources.DataSource):      @property      def network_config(self):          """Return a network config dict for rendering ENI or netplan files.""" -        if self._network_config != _unset: +        if self._network_config != sources.UNSET:              return self._network_config          if self.metadata is None: -            # this would happen if get_data hadn't been called. leave as _unset +            # this would happen if get_data hadn't been called. leave as UNSET              LOG.warning(                  "Unexpected call to network_config when metadata is None.")              return None @@ -361,9 +345,7 @@ class DataSourceEc2(sources.DataSource):                  self._fallback_interface = _legacy_fbnic                  self.fallback_nic = None              else: -                self._fallback_interface = net.find_fallback_nic() -                if self._fallback_interface is None: -                    LOG.warning("Did not find a fallback interface on EC2.") +                return super(DataSourceEc2, self).fallback_interface          return self._fallback_interface      def _crawl_metadata(self): @@ -398,7 +380,7 @@ class DataSourceEc2Local(DataSourceEc2):      metadata service. If the metadata service provides network configuration      then render the network configuration for that instance based on metadata.      """ -    get_network_metadata = True  # Get metadata network config if present +    perform_dhcp_setup = True  # Use dhcp before querying metadata      def get_data(self):          supported_platforms = (Platforms.AWS,) diff --git a/cloudinit/sources/DataSourceIBMCloud.py b/cloudinit/sources/DataSourceIBMCloud.py index 02b3d56f..a5358148 100644 --- a/cloudinit/sources/DataSourceIBMCloud.py +++ b/cloudinit/sources/DataSourceIBMCloud.py @@ -8,17 +8,11 @@ There are 2 different api exposed launch methods.   * template: This is the legacy method of launching instances.     When booting from an image template, the system boots first into     a "provisioning" mode.  There, host <-> guest mechanisms are utilized -   to execute code in the guest and provision it. +   to execute code in the guest and configure it.  The configuration +   includes configuring the system network and possibly installing +   packages and other software stack. -   Cloud-init will disable itself when it detects that it is in the -   provisioning mode.  It detects this by the presence of -   a file '/root/provisioningConfiguration.cfg'. - -   When provided with user-data, the "first boot" will contain a -   ConfigDrive-like disk labeled with 'METADATA'.  If there is no user-data -   provided, then there is no data-source. - -   Cloud-init never does any network configuration in this mode. +   After the provisioning is finished, the system reboots.   * os_code: Essentially "launch by OS Code" (Operating System Code).     This is a more modern approach.  There is no specific "provisioning" boot. @@ -30,11 +24,73 @@ There are 2 different api exposed launch methods.     mean that 1 in 8^16 (~4 billion) Xen ConfigDrive systems will be     incorrectly identified as IBMCloud. +The combination of these 2 launch methods and with or without user-data +creates 6 boot scenarios. + A. os_code with user-data + B. os_code without user-data +    Cloud-init is fully operational in this mode. + +    There is a block device attached with label 'config-2'. +    As it differs from OpenStack's config-2, we have to differentiate. +    We do so by requiring the UUID on the filesystem to be "9796-932E". + +    This disk will have the following files. Specifically note, there +    is no versioned path to the meta-data, only 'latest': +      openstack/latest/meta_data.json +      openstack/latest/network_data.json +      openstack/latest/user_data [optional] +      openstack/latest/vendor_data.json + +    vendor_data.json as of 2018-04 looks like this: +      {"cloud-init":"#!/bin/bash\necho 'root:$6$<snip>' | chpasswd -e"} + +    The only difference between A and B in this mode is the presence +    of user_data on the config disk. + + C. template, provisioning boot with user-data + D. template, provisioning boot without user-data. +    With ds-identify cloud-init is fully disabled in this mode. +    Without ds-identify, cloud-init None datasource will be used. + +    This is currently identified by the presence of +    /root/provisioningConfiguration.cfg . That file is placed into the +    system before it is booted. + +    The difference between C and D is the presence of the METADATA disk +    as described in E below.  There is no METADATA disk attached unless +    user-data is provided. + + E. template, post-provisioning boot with user-data. +    Cloud-init is fully operational in this mode. + +    This is identified by a block device with filesystem label "METADATA". +    The looks similar to a version-1 OpenStack config drive.  It will +    have the following files: + +       openstack/latest/user_data +       openstack/latest/meta_data.json +       openstack/content/interfaces +       meta.js + +    meta.js contains something similar to user_data.  cloud-init ignores it. +    cloud-init ignores the 'interfaces' style file here. +    In this mode, cloud-init has networking code disabled.  It relies +    on the provisioning boot to have configured networking. + + F. template, post-provisioning boot without user-data. +    With ds-identify, cloud-init will be fully disabled. +    Without ds-identify, cloud-init None datasource will be used. + +    There is no information available to identify this scenario. + +    The user will be able to ssh in as as root with their public keys that +    have been installed into /root/ssh/.authorized_keys +    during the provisioning stage. +  TODO:   * is uuid (/sys/hypervisor/uuid) stable for life of an instance?     it seems it is not the same as data's uuid in the os_code case     but is in the template case. -  """  import base64  import json @@ -138,8 +194,30 @@ def _is_xen():      return os.path.exists("/proc/xen") -def _is_ibm_provisioning(): -    return os.path.exists("/root/provisioningConfiguration.cfg") +def _is_ibm_provisioning( +        prov_cfg="/root/provisioningConfiguration.cfg", +        inst_log="/root/swinstall.log", +        boot_ref="/proc/1/environ"): +    """Return boolean indicating if this boot is ibm provisioning boot.""" +    if os.path.exists(prov_cfg): +        msg = "config '%s' exists." % prov_cfg +        result = True +        if os.path.exists(inst_log): +            if os.path.exists(boot_ref): +                result = (os.stat(inst_log).st_mtime > +                          os.stat(boot_ref).st_mtime) +                msg += (" log '%s' from %s boot." % +                        (inst_log, "current" if result else "previous")) +            else: +                msg += (" log '%s' existed, but no reference file '%s'." % +                        (inst_log, boot_ref)) +                result = False +        else: +            msg += " log '%s' did not exist." % inst_log +    else: +        result, msg = (False, "config '%s' did not exist." % prov_cfg) +    LOG.debug("ibm_provisioning=%s: %s", result, msg) +    return result  def get_ibm_platform(): @@ -189,7 +267,7 @@ def get_ibm_platform():          else:              return (Platforms.TEMPLATE_LIVE_METADATA, metadata_path)      elif _is_ibm_provisioning(): -            return (Platforms.TEMPLATE_PROVISIONING_NODATA, None) +        return (Platforms.TEMPLATE_PROVISIONING_NODATA, None)      return not_found @@ -217,7 +295,7 @@ def read_md():              results = metadata_from_dir(path)          else:              results = util.mount_cb(path, metadata_from_dir) -    except BrokenMetadata as e: +    except sources.BrokenMetadata as e:          raise RuntimeError(              "Failed reading IBM config disk (platform=%s path=%s): %s" %              (platform, path, e)) @@ -226,10 +304,6 @@ def read_md():      return ret -class BrokenMetadata(IOError): -    pass - -  def metadata_from_dir(source_dir):      """Walk source_dir extracting standardized metadata. @@ -274,12 +348,13 @@ def metadata_from_dir(source_dir):              try:                  data = transl(raw)              except Exception as e: -                raise BrokenMetadata("Failed decoding %s: %s" % (path, e)) +                raise sources.BrokenMetadata( +                    "Failed decoding %s: %s" % (path, e))          results[name] = data      if results.get('metadata_raw') is None: -        raise BrokenMetadata( +        raise sources.BrokenMetadata(              "%s missing required file 'meta_data.json'" % source_dir)      results['metadata'] = {} @@ -290,7 +365,7 @@ def metadata_from_dir(source_dir):          try:              md['random_seed'] = base64.b64decode(md_raw['random_seed'])          except (ValueError, TypeError) as e: -            raise BrokenMetadata( +            raise sources.BrokenMetadata(                  "Badly formatted metadata random_seed entry: %s" % e)      renames = ( diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 6ac88635..bcb38544 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -198,13 +198,13 @@ def read_maas_seed_url(seed_url, read_file_or_url=None, timeout=None,      If version is None, then <version>/ will not be used.      """      if read_file_or_url is None: -        read_file_or_url = util.read_file_or_url +        read_file_or_url = url_helper.read_file_or_url      if seed_url.endswith("/"):          seed_url = seed_url[:-1]      md = {} -    for path, dictname, binary, optional in DS_FIELDS: +    for path, _dictname, binary, optional in DS_FIELDS:          if version is None:              url = "%s/%s" % (seed_url, path)          else: diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index 5d3a8ddb..2daea59d 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -78,7 +78,7 @@ class DataSourceNoCloud(sources.DataSource):                  LOG.debug("Using seeded data from %s", path)                  mydata = _merge_new_seed(mydata, seeded)                  break -            except ValueError as e: +            except ValueError:                  pass          # If the datasource config had a 'seedfrom' entry, then that takes @@ -117,7 +117,7 @@ class DataSourceNoCloud(sources.DataSource):                      try:                          seeded = util.mount_cb(dev, _pp2d_callback,                                                 pp2d_kwargs) -                    except ValueError as e: +                    except ValueError:                          if dev in label_list:                              LOG.warning("device %s with label=%s not a"                                          "valid seed.", dev, label) diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index dc914a72..178ccb0f 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -556,7 +556,7 @@ def search_file(dirpath, filename):      if not dirpath or not filename:          return None -    for root, dirs, files in os.walk(dirpath): +    for root, _dirs, files in os.walk(dirpath):          if filename in files:              return os.path.join(root, filename) diff --git a/cloudinit/sources/DataSourceOpenNebula.py b/cloudinit/sources/DataSourceOpenNebula.py index d4a41116..77ccd128 100644 --- a/cloudinit/sources/DataSourceOpenNebula.py +++ b/cloudinit/sources/DataSourceOpenNebula.py @@ -232,7 +232,7 @@ class OpenNebulaNetwork(object):              # Set IPv6 default gateway              gateway6 = self.get_gateway6(c_dev) -            if gateway: +            if gateway6:                  devconf['gateway6'] = gateway6              # Set DNS servers and search domains @@ -378,7 +378,7 @@ def read_context_disk_dir(source_dir, asuser=None):          if asuser is not None:              try:                  pwd.getpwnam(asuser) -            except KeyError as e: +            except KeyError:                  raise BrokenContextDiskDir(                      "configured user '{user}' does not exist".format(                          user=asuser)) diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index e55a7638..4a015240 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -7,11 +7,13 @@  import time  from cloudinit import log as logging +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError  from cloudinit import sources  from cloudinit import url_helper  from cloudinit import util  from cloudinit.sources.helpers import openstack +from cloudinit.sources import DataSourceOracle as oracle  LOG = logging.getLogger(__name__) @@ -22,51 +24,37 @@ DEFAULT_METADATA = {      "instance-id": DEFAULT_IID,  } +# OpenStack DMI constants +DMI_PRODUCT_NOVA = 'OpenStack Nova' +DMI_PRODUCT_COMPUTE = 'OpenStack Compute' +VALID_DMI_PRODUCT_NAMES = [DMI_PRODUCT_NOVA, DMI_PRODUCT_COMPUTE] +DMI_ASSET_TAG_OPENTELEKOM = 'OpenTelekomCloud' +VALID_DMI_ASSET_TAGS = [DMI_ASSET_TAG_OPENTELEKOM] +  class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):      dsname = "OpenStack" +    _network_config = sources.UNSET  # Used to cache calculated network cfg v1 + +    # Whether we want to get network configuration from the metadata service. +    perform_dhcp_setup = False +      def __init__(self, sys_cfg, distro, paths):          super(DataSourceOpenStack, self).__init__(sys_cfg, distro, paths)          self.metadata_address = None          self.ssl_details = util.fetch_ssl_details(self.paths)          self.version = None          self.files = {} -        self.ec2_metadata = None +        self.ec2_metadata = sources.UNSET +        self.network_json = sources.UNSET      def __str__(self):          root = sources.DataSource.__str__(self)          mstr = "%s [%s,ver=%s]" % (root, self.dsmode, self.version)          return mstr -    def _get_url_settings(self): -        # TODO(harlowja): this is shared with ec2 datasource, we should just -        # move it to a shared location instead... -        # Note: the defaults here are different though. - -        # max_wait < 0 indicates do not wait -        max_wait = -1 -        timeout = 10 -        retries = 5 - -        try: -            max_wait = int(self.ds_cfg.get("max_wait", max_wait)) -        except Exception: -            util.logexc(LOG, "Failed to get max wait. using %s", max_wait) - -        try: -            timeout = max(0, int(self.ds_cfg.get("timeout", timeout))) -        except Exception: -            util.logexc(LOG, "Failed to get timeout, using %s", timeout) - -        try: -            retries = int(self.ds_cfg.get("retries", retries)) -        except Exception: -            util.logexc(LOG, "Failed to get retries. using %s", retries) - -        return (max_wait, timeout, retries) -      def wait_for_metadata_service(self):          urls = self.ds_cfg.get("metadata_urls", [DEF_MD_URL])          filtered = [x for x in urls if util.is_resolvable_url(x)] @@ -86,10 +74,11 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):              md_urls.append(md_url)              url2base[md_url] = url -        (max_wait, timeout, retries) = self._get_url_settings() +        url_params = self.get_url_params()          start_time = time.time() -        avail_url = url_helper.wait_for_url(urls=md_urls, max_wait=max_wait, -                                            timeout=timeout) +        avail_url = url_helper.wait_for_url( +            urls=md_urls, max_wait=url_params.max_wait_seconds, +            timeout=url_params.timeout_seconds)          if avail_url:              LOG.debug("Using metadata source: '%s'", url2base[avail_url])          else: @@ -99,38 +88,68 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):          self.metadata_address = url2base.get(avail_url)          return bool(avail_url) -    def _get_data(self): -        try: -            if not self.wait_for_metadata_service(): -                return False -        except IOError: -            return False +    def check_instance_id(self, sys_cfg): +        # quickly (local check only) if self.instance_id is still valid +        return sources.instance_id_matches_system_uuid(self.get_instance_id()) -        (max_wait, timeout, retries) = self._get_url_settings() +    @property +    def network_config(self): +        """Return a network config dict for rendering ENI or netplan files.""" +        if self._network_config != sources.UNSET: +            return self._network_config + +        # RELEASE_BLOCKER: SRU to Xenial and Artful SRU should not provide +        # network_config by default unless configured in /etc/cloud/cloud.cfg*. +        # Patch Xenial and Artful before release to default to False. +        if util.is_false(self.ds_cfg.get('apply_network_config', True)): +            self._network_config = None +            return self._network_config +        if self.network_json == sources.UNSET: +            # this would happen if get_data hadn't been called. leave as UNSET +            LOG.warning( +                'Unexpected call to network_config when network_json is None.') +            return None + +        LOG.debug('network config provided via network_json') +        self._network_config = openstack.convert_net_json( +            self.network_json, known_macs=None) +        return self._network_config -        try: -            results = util.log_time(LOG.debug, -                                    'Crawl of openstack metadata service', -                                    read_metadata_service, -                                    args=[self.metadata_address], -                                    kwargs={'ssl_details': self.ssl_details, -                                            'retries': retries, -                                            'timeout': timeout}) -        except openstack.NonReadable: -            return False -        except (openstack.BrokenMetadata, IOError): -            util.logexc(LOG, "Broken metadata address %s", -                        self.metadata_address) +    def _get_data(self): +        """Crawl metadata, parse and persist that data for this instance. + +        @return: True when metadata discovered indicates OpenStack datasource. +            False when unable to contact metadata service or when metadata +            format is invalid or disabled. +        """ +        oracle_considered = 'Oracle' in self.sys_cfg.get('datasource_list') +        if not detect_openstack(accept_oracle=not oracle_considered):              return False +        if self.perform_dhcp_setup:  # Setup networking in init-local stage. +            try: +                with EphemeralDHCPv4(self.fallback_interface): +                    results = util.log_time( +                        logfunc=LOG.debug, msg='Crawl of metadata service', +                        func=self._crawl_metadata) +            except (NoDHCPLeaseError, sources.InvalidMetaDataException) as e: +                util.logexc(LOG, str(e)) +                return False +        else: +            try: +                results = self._crawl_metadata() +            except sources.InvalidMetaDataException as e: +                util.logexc(LOG, str(e)) +                return False +          self.dsmode = self._determine_dsmode([results.get('dsmode')])          if self.dsmode == sources.DSMODE_DISABLED:              return False -          md = results.get('metadata', {})          md = util.mergemanydict([md, DEFAULT_METADATA])          self.metadata = md          self.ec2_metadata = results.get('ec2-metadata') +        self.network_json = results.get('networkdata')          self.userdata_raw = results.get('userdata')          self.version = results['version']          self.files.update(results.get('files', {})) @@ -145,9 +164,50 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):          return True -    def check_instance_id(self, sys_cfg): -        # quickly (local check only) if self.instance_id is still valid -        return sources.instance_id_matches_system_uuid(self.get_instance_id()) +    def _crawl_metadata(self): +        """Crawl metadata service when available. + +        @returns: Dictionary with all metadata discovered for this datasource. +        @raise: InvalidMetaDataException on unreadable or broken +            metadata. +        """ +        try: +            if not self.wait_for_metadata_service(): +                raise sources.InvalidMetaDataException( +                    'No active metadata service found') +        except IOError as e: +            raise sources.InvalidMetaDataException( +                'IOError contacting metadata service: {error}'.format( +                    error=str(e))) + +        url_params = self.get_url_params() + +        try: +            result = util.log_time( +                LOG.debug, 'Crawl of openstack metadata service', +                read_metadata_service, args=[self.metadata_address], +                kwargs={'ssl_details': self.ssl_details, +                        'retries': url_params.num_retries, +                        'timeout': url_params.timeout_seconds}) +        except openstack.NonReadable as e: +            raise sources.InvalidMetaDataException(str(e)) +        except (openstack.BrokenMetadata, IOError): +            msg = 'Broken metadata address {addr}'.format( +                addr=self.metadata_address) +            raise sources.InvalidMetaDataException(msg) +        return result + + +class DataSourceOpenStackLocal(DataSourceOpenStack): +    """Run in init-local using a dhcp discovery prior to metadata crawl. + +    In init-local, no network is available. This subclass sets up minimal +    networking with dhclient on a viable nic so that it can talk to the +    metadata service. If the metadata service provides network configuration +    then render the network configuration for that instance based on metadata. +    """ + +    perform_dhcp_setup = True  # Get metadata network config if present  def read_metadata_service(base_url, ssl_details=None, @@ -157,8 +217,25 @@ def read_metadata_service(base_url, ssl_details=None,      return reader.read_v2() +def detect_openstack(accept_oracle=False): +    """Return True when a potential OpenStack platform is detected.""" +    if not util.is_x86(): +        return True  # Non-Intel cpus don't properly report dmi product names +    product_name = util.read_dmi_data('system-product-name') +    if product_name in VALID_DMI_PRODUCT_NAMES: +        return True +    elif util.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: +        return True +    elif accept_oracle and oracle._is_platform_viable(): +        return True +    elif util.get_proc_env(1).get('product_name') == DMI_PRODUCT_NOVA: +        return True +    return False + +  # Used to match classes to dependencies  datasources = [ +    (DataSourceOpenStackLocal, (sources.DEP_FILESYSTEM,)),      (DataSourceOpenStack, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),  ] diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py new file mode 100644 index 00000000..fab39af3 --- /dev/null +++ b/cloudinit/sources/DataSourceOracle.py @@ -0,0 +1,233 @@ +# This file is part of cloud-init. See LICENSE file for license information. +"""Datasource for Oracle (OCI/Oracle Cloud Infrastructure) + +OCI provides a OpenStack like metadata service which provides only +'2013-10-17' and 'latest' versions.. + +Notes: + * This datasource does not support the OCI-Classic. OCI-Classic +   provides an EC2 lookalike metadata service. + * The uuid provided in DMI data is not the same as the meta-data provided +   instance-id, but has an equivalent lifespan. + * We do need to support upgrade from an instance that cloud-init +   identified as OpenStack. + * Both bare-metal and vms use iscsi root + * Both bare-metal and vms provide chassis-asset-tag of OracleCloud.com +""" + +from cloudinit.url_helper import combine_url, readurl, UrlError +from cloudinit.net import dhcp +from cloudinit import net +from cloudinit import sources +from cloudinit import util +from cloudinit.net import cmdline +from cloudinit import log as logging + +import json +import re + +LOG = logging.getLogger(__name__) + +CHASSIS_ASSET_TAG = "OracleCloud.com" +METADATA_ENDPOINT = "http://169.254.169.254/openstack/" + + +class DataSourceOracle(sources.DataSource): + +    dsname = 'Oracle' +    system_uuid = None +    vendordata_pure = None +    _network_config = sources.UNSET + +    def _is_platform_viable(self): +        """Check platform environment to report if this datasource may run.""" +        return _is_platform_viable() + +    def _get_data(self): +        if not self._is_platform_viable(): +            return False + +        # network may be configured if iscsi root.  If that is the case +        # then read_kernel_cmdline_config will return non-None. +        if _is_iscsi_root(): +            data = self.crawl_metadata() +        else: +            with dhcp.EphemeralDHCPv4(net.find_fallback_nic()): +                data = self.crawl_metadata() + +        self._crawled_metadata = data +        vdata = data['2013-10-17'] + +        self.userdata_raw = vdata.get('user_data') +        self.system_uuid = vdata['system_uuid'] + +        vd = vdata.get('vendor_data') +        if vd: +            self.vendordata_pure = vd +            try: +                self.vendordata_raw = sources.convert_vendordata(vd) +            except ValueError as e: +                LOG.warning("Invalid content in vendor-data: %s", e) +                self.vendordata_raw = None + +        mdcopies = ('public_keys',) +        md = dict([(k, vdata['meta_data'].get(k)) +                   for k in mdcopies if k in vdata['meta_data']]) + +        mdtrans = ( +            # oracle meta_data.json name, cloudinit.datasource.metadata name +            ('availability_zone', 'availability-zone'), +            ('hostname', 'local-hostname'), +            ('launch_index', 'launch-index'), +            ('uuid', 'instance-id'), +        ) +        for dsname, ciname in mdtrans: +            if dsname in vdata['meta_data']: +                md[ciname] = vdata['meta_data'][dsname] + +        self.metadata = md +        return True + +    def crawl_metadata(self): +        return read_metadata() + +    def check_instance_id(self, sys_cfg): +        """quickly check (local only) if self.instance_id is still valid + +        On Oracle, the dmi-provided system uuid differs from the instance-id +        but has the same life-span.""" +        return sources.instance_id_matches_system_uuid(self.system_uuid) + +    def get_public_ssh_keys(self): +        return sources.normalize_pubkey_data(self.metadata.get('public_keys')) + +    @property +    def network_config(self): +        """Network config is read from initramfs provided files +        If none is present, then we fall back to fallback configuration. + +        One thing to note here is that this method is not currently +        considered at all if there is is kernel/initramfs provided +        data.  In that case, stages considers that the cmdline data +        overrides datasource provided data and does not consult here. + +        We nonetheless return cmdline provided config if present +        and fallback to generate fallback.""" +        if self._network_config == sources.UNSET: +            cmdline_cfg = cmdline.read_kernel_cmdline_config() +            if cmdline_cfg: +                self._network_config = cmdline_cfg +            else: +                self._network_config = self.distro.generate_fallback_config() +        return self._network_config + + +def _read_system_uuid(): +    sys_uuid = util.read_dmi_data('system-uuid') +    return None if sys_uuid is None else sys_uuid.lower() + + +def _is_platform_viable(): +    asset_tag = util.read_dmi_data('chassis-asset-tag') +    return asset_tag == CHASSIS_ASSET_TAG + + +def _is_iscsi_root(): +    return bool(cmdline.read_kernel_cmdline_config()) + + +def _load_index(content): +    """Return a list entries parsed from content. + +    OpenStack's metadata service returns a newline delimited list +    of items.  Oracle's implementation has html formatted list of links. +    The parser here just grabs targets from <a href="target"> +    and throws away "../". + +    Oracle has accepted that to be buggy and may fix in the future +    to instead return a '\n' delimited plain text list.  This function +    will continue to work if that change is made.""" +    if not content.lower().startswith("<html>"): +        return content.splitlines() +    items = re.findall( +        r'href="(?P<target>[^"]*)"', content, re.MULTILINE | re.IGNORECASE) +    return [i for i in items if not i.startswith(".")] + + +def read_metadata(endpoint_base=METADATA_ENDPOINT, sys_uuid=None, +                  version='2013-10-17'): +    """Read metadata, return a dictionary. + +    Each path listed in the index will be represented in the dictionary. +    If the path ends in .json, then the content will be decoded and +    populated into the dictionary. + +    The system uuid (/sys/class/dmi/id/product_uuid) is also populated. +    Example: given paths = ('user_data', 'meta_data.json') +    This would return: +      {version: {'user_data': b'blob', 'meta_data': json.loads(blob.decode()) +                 'system_uuid': '3b54f2e0-3ab2-458d-b770-af9926eee3b2'}} +    """ +    endpoint = combine_url(endpoint_base, version) + "/" +    if sys_uuid is None: +        sys_uuid = _read_system_uuid() +    if not sys_uuid: +        raise sources.BrokenMetadata("Failed to read system uuid.") + +    try: +        resp = readurl(endpoint) +        if not resp.ok(): +            raise sources.BrokenMetadata( +                "Bad response from %s: %s" % (endpoint, resp.code)) +    except UrlError as e: +        raise sources.BrokenMetadata( +            "Failed to read index at %s: %s" % (endpoint, e)) + +    entries = _load_index(resp.contents.decode('utf-8')) +    LOG.debug("index url %s contained: %s", endpoint, entries) + +    # meta_data.json is required. +    mdj = 'meta_data.json' +    if mdj not in entries: +        raise sources.BrokenMetadata( +            "Required field '%s' missing in index at %s" % (mdj, endpoint)) + +    ret = {'system_uuid': sys_uuid} +    for path in entries: +        response = readurl(combine_url(endpoint, path)) +        if path.endswith(".json"): +            ret[path.rpartition(".")[0]] = ( +                json.loads(response.contents.decode('utf-8'))) +        else: +            ret[path] = response.contents + +    return {version: ret} + + +# Used to match classes to dependencies +datasources = [ +    (DataSourceOracle, (sources.DEP_FILESYSTEM,)), +] + + +# Return a list of data sources that match this set of dependencies +def get_datasource_list(depends): +    return sources.list_from_depends(depends, datasources) + + +if __name__ == "__main__": +    import argparse +    import os + +    parser = argparse.ArgumentParser(description='Query Oracle Cloud Metadata') +    parser.add_argument("--endpoint", metavar="URL", +                        help="The url of the metadata service.", +                        default=METADATA_ENDPOINT) +    args = parser.parse_args() +    sys_uuid = "uuid-not-available-not-root" if os.geteuid() != 0 else None + +    data = read_metadata(endpoint_base=args.endpoint, sys_uuid=sys_uuid) +    data['is_platform_viable'] = _is_platform_viable() +    print(util.json_dumps(data)) + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py index e2502b02..9dc4ab23 100644 --- a/cloudinit/sources/DataSourceScaleway.py +++ b/cloudinit/sources/DataSourceScaleway.py @@ -29,7 +29,9 @@ from cloudinit import log as logging  from cloudinit import sources  from cloudinit import url_helper  from cloudinit import util - +from cloudinit import net +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError +from cloudinit.event import EventType  LOG = logging.getLogger(__name__) @@ -168,8 +170,8 @@ def query_data_api(api_type, api_address, retries, timeout):  class DataSourceScaleway(sources.DataSource): -      dsname = "Scaleway" +    update_events = {'network': [EventType.BOOT_NEW_INSTANCE, EventType.BOOT]}      def __init__(self, sys_cfg, distro, paths):          super(DataSourceScaleway, self).__init__(sys_cfg, distro, paths) @@ -185,11 +187,10 @@ class DataSourceScaleway(sources.DataSource):          self.retries = int(self.ds_cfg.get('retries', DEF_MD_RETRIES))          self.timeout = int(self.ds_cfg.get('timeout', DEF_MD_TIMEOUT)) +        self._fallback_interface = None +        self._network_config = None -    def _get_data(self): -        if not on_scaleway(): -            return False - +    def _crawl_metadata(self):          resp = url_helper.readurl(self.metadata_address,                                    timeout=self.timeout,                                    retries=self.retries) @@ -203,9 +204,48 @@ class DataSourceScaleway(sources.DataSource):              'vendor-data', self.vendordata_address,              self.retries, self.timeout          ) + +    def _get_data(self): +        if not on_scaleway(): +            return False + +        if self._fallback_interface is None: +            self._fallback_interface = net.find_fallback_nic() +        try: +            with EphemeralDHCPv4(self._fallback_interface): +                util.log_time( +                    logfunc=LOG.debug, msg='Crawl of metadata service', +                    func=self._crawl_metadata) +        except (NoDHCPLeaseError) as e: +            util.logexc(LOG, str(e)) +            return False          return True      @property +    def network_config(self): +        """ +        Configure networking according to data received from the +        metadata API. +        """ +        if self._network_config: +            return self._network_config + +        if self._fallback_interface is None: +            self._fallback_interface = net.find_fallback_nic() + +        netcfg = {'type': 'physical', 'name': '%s' % self._fallback_interface} +        subnets = [{'type': 'dhcp4'}] +        if self.metadata['ipv6']: +            subnets += [{'type': 'static', +                         'address': '%s' % self.metadata['ipv6']['address'], +                         'gateway': '%s' % self.metadata['ipv6']['gateway'], +                         'netmask': '%s' % self.metadata['ipv6']['netmask'], +                         }] +        netcfg['subnets'] = subnets +        self._network_config = {'version': 1, 'config': [netcfg]} +        return self._network_config + +    @property      def launch_index(self):          return None @@ -228,7 +268,7 @@ class DataSourceScaleway(sources.DataSource):  datasources = [ -    (DataSourceScaleway, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), +    (DataSourceScaleway, (sources.DEP_FILESYSTEM,)),  ] diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py index 86bfa5d8..593ac91a 100644 --- a/cloudinit/sources/DataSourceSmartOS.py +++ b/cloudinit/sources/DataSourceSmartOS.py @@ -1,4 +1,5 @@  # Copyright (C) 2013 Canonical Ltd. +# Copyright (c) 2018, Joyent, Inc.  #  # Author: Ben Howard <ben.howard@canonical.com>  # @@ -10,17 +11,19 @@  #    SmartOS hosts use a serial console (/dev/ttyS1) on KVM Linux Guests  #        The meta-data is transmitted via key/value pairs made by  #        requests on the console. For example, to get the hostname, you -#        would send "GET hostname" on /dev/ttyS1. +#        would send "GET sdc:hostname" on /dev/ttyS1.  #        For Linux Guests running in LX-Brand Zones on SmartOS hosts  #        a socket (/native/.zonecontrol/metadata.sock) is used instead  #        of a serial console.  #  #   Certain behavior is defined by the DataDictionary -#       http://us-east.manta.joyent.com/jmc/public/mdata/datadict.html +#       https://eng.joyent.com/mdata/datadict.html  #       Comments with "@datadictionary" are snippets of the definition  import base64  import binascii +import errno +import fcntl  import json  import os  import random @@ -108,7 +111,7 @@ BUILTIN_CLOUD_CONFIG = {                         'overwrite': False}      },      'fs_setup': [{'label': 'ephemeral0', -                  'filesystem': 'ext3', +                  'filesystem': 'ext4',                    'device': 'ephemeral0'}],  } @@ -162,9 +165,8 @@ class DataSourceSmartOS(sources.DataSource):      dsname = "Joyent" -    _unset = "_unset" -    smartos_type = _unset -    md_client = _unset +    smartos_type = sources.UNSET +    md_client = sources.UNSET      def __init__(self, sys_cfg, distro, paths):          sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -186,12 +188,12 @@ class DataSourceSmartOS(sources.DataSource):          return "%s [client=%s]" % (root, self.md_client)      def _init(self): -        if self.smartos_type == self._unset: +        if self.smartos_type == sources.UNSET:              self.smartos_type = get_smartos_environ()              if self.smartos_type is None:                  self.md_client = None -        if self.md_client == self._unset: +        if self.md_client == sources.UNSET:              self.md_client = jmc_client_factory(                  smartos_type=self.smartos_type,                  metadata_sockfile=self.ds_cfg['metadata_sockfile'], @@ -229,6 +231,9 @@ class DataSourceSmartOS(sources.DataSource):                        self.md_client)              return False +        # Open once for many requests, rather than once for each request +        self.md_client.open_transport() +          for ci_noun, attribute in SMARTOS_ATTRIB_MAP.items():              smartos_noun, strip = attribute              md[ci_noun] = self.md_client.get(smartos_noun, strip=strip) @@ -236,6 +241,8 @@ class DataSourceSmartOS(sources.DataSource):          for ci_noun, smartos_noun in SMARTOS_ATTRIB_JSON.items():              md[ci_noun] = self.md_client.get_json(smartos_noun) +        self.md_client.close_transport() +          # @datadictionary: This key may contain a program that is written          # to a file in the filesystem of the guest on each boot and then          # executed. It may be of any format that would be considered @@ -266,8 +273,14 @@ class DataSourceSmartOS(sources.DataSource):          write_boot_content(u_data, u_data_f)          # Handle the cloud-init regular meta + +        # The hostname may or may not be qualified with the local domain name. +        # This follows section 3.14 of RFC 2132.          if not md['local-hostname']: -            md['local-hostname'] = md['instance-id'] +            if md['hostname']: +                md['local-hostname'] = md['hostname'] +            else: +                md['local-hostname'] = md['instance-id']          ud = None          if md['user-data']: @@ -285,6 +298,7 @@ class DataSourceSmartOS(sources.DataSource):          self.userdata_raw = ud          self.vendordata_raw = md['vendor-data']          self.network_data = md['network-data'] +        self.routes_data = md['routes']          self._set_provisioned()          return True @@ -308,7 +322,8 @@ class DataSourceSmartOS(sources.DataSource):                      convert_smartos_network_data(                          network_data=self.network_data,                          dns_servers=self.metadata['dns_servers'], -                        dns_domain=self.metadata['dns_domain'])) +                        dns_domain=self.metadata['dns_domain'], +                        routes=self.routes_data))          return self._network_config @@ -316,6 +331,10 @@ class JoyentMetadataFetchException(Exception):      pass +class JoyentMetadataTimeoutException(JoyentMetadataFetchException): +    pass + +  class JoyentMetadataClient(object):      """      A client implementing v2 of the Joyent Metadata Protocol Specification. @@ -360,6 +379,47 @@ class JoyentMetadataClient(object):          LOG.debug('Value "%s" found.', value)          return value +    def _readline(self): +        """ +           Reads a line a byte at a time until \n is encountered.  Returns an +           ascii string with the trailing newline removed. + +           If a timeout (per-byte) is set and it expires, a +           JoyentMetadataFetchException will be thrown. +        """ +        response = [] + +        def as_ascii(): +            return b''.join(response).decode('ascii') + +        msg = "Partial response: '%s'" +        while True: +            try: +                byte = self.fp.read(1) +                if len(byte) == 0: +                    raise JoyentMetadataTimeoutException(msg % as_ascii()) +                if byte == b'\n': +                    return as_ascii() +                response.append(byte) +            except OSError as exc: +                if exc.errno == errno.EAGAIN: +                    raise JoyentMetadataTimeoutException(msg % as_ascii()) +                raise + +    def _write(self, msg): +        self.fp.write(msg.encode('ascii')) +        self.fp.flush() + +    def _negotiate(self): +        LOG.debug('Negotiating protocol V2') +        self._write('NEGOTIATE V2\n') +        response = self._readline() +        LOG.debug('read "%s"', response) +        if response != 'V2_OK': +            raise JoyentMetadataFetchException( +                'Invalid response "%s" to "NEGOTIATE V2"' % response) +        LOG.debug('Negotiation complete') +      def request(self, rtype, param=None):          request_id = '{0:08x}'.format(random.randint(0, 0xffffffff))          message_body = ' '.join((request_id, rtype,)) @@ -374,18 +434,11 @@ class JoyentMetadataClient(object):              self.open_transport()              need_close = True -        self.fp.write(msg.encode('ascii')) -        self.fp.flush() - -        response = bytearray() -        response.extend(self.fp.read(1)) -        while response[-1:] != b'\n': -            response.extend(self.fp.read(1)) - +        self._write(msg) +        response = self._readline()          if need_close:              self.close_transport() -        response = response.rstrip().decode('ascii')          LOG.debug('Read "%s" from metadata transport.', response)          if 'SUCCESS' not in response: @@ -410,9 +463,9 @@ class JoyentMetadataClient(object):      def list(self):          result = self.request(rtype='KEYS') -        if result: -            result = result.split('\n') -        return result +        if not result: +            return [] +        return result.split('\n')      def put(self, key, val):          param = b' '.join([base64.b64encode(i.encode()) @@ -450,6 +503,7 @@ class JoyentMetadataSocketClient(JoyentMetadataClient):          sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)          sock.connect(self.socketpath)          self.fp = sock.makefile('rwb') +        self._negotiate()      def exists(self):          return os.path.exists(self.socketpath) @@ -459,8 +513,9 @@ class JoyentMetadataSocketClient(JoyentMetadataClient):  class JoyentMetadataSerialClient(JoyentMetadataClient): -    def __init__(self, device, timeout=10, smartos_type=SMARTOS_ENV_KVM): -        super(JoyentMetadataSerialClient, self).__init__(smartos_type) +    def __init__(self, device, timeout=10, smartos_type=SMARTOS_ENV_KVM, +                 fp=None): +        super(JoyentMetadataSerialClient, self).__init__(smartos_type, fp)          self.device = device          self.timeout = timeout @@ -468,10 +523,51 @@ class JoyentMetadataSerialClient(JoyentMetadataClient):          return os.path.exists(self.device)      def open_transport(self): -        ser = serial.Serial(self.device, timeout=self.timeout) -        if not ser.isOpen(): -            raise SystemError("Unable to open %s" % self.device) -        self.fp = ser +        if self.fp is None: +            ser = serial.Serial(self.device, timeout=self.timeout) +            if not ser.isOpen(): +                raise SystemError("Unable to open %s" % self.device) +            self.fp = ser +            fcntl.lockf(ser, fcntl.LOCK_EX) +        self._flush() +        self._negotiate() + +    def _flush(self): +        LOG.debug('Flushing input') +        # Read any pending data +        timeout = self.fp.timeout +        self.fp.timeout = 0.1 +        while True: +            try: +                self._readline() +            except JoyentMetadataTimeoutException: +                break +        LOG.debug('Input empty') + +        # Send a newline and expect "invalid command".  Keep trying until +        # successful.  Retry rather frequently so that the "Is the host +        # metadata service running" appears on the console soon after someone +        # attaches in an effort to debug. +        if timeout > 5: +            self.fp.timeout = 5 +        else: +            self.fp.timeout = timeout +        while True: +            LOG.debug('Writing newline, expecting "invalid command"') +            self._write('\n') +            try: +                response = self._readline() +                if response == 'invalid command': +                    break +                if response == 'FAILURE': +                    LOG.debug('Got "FAILURE".  Retrying.') +                    continue +                LOG.warning('Unexpected response "%s" during flush', response) +            except JoyentMetadataTimeoutException: +                LOG.warning('Timeout while initializing metadata client. ' +                            'Is the host metadata service running?') +        LOG.debug('Got "invalid command".  Flush complete.') +        self.fp.timeout = timeout      def __repr__(self):          return "%s(device=%s, timeout=%s)" % ( @@ -587,6 +683,18 @@ def jmc_client_factory(      raise ValueError("Unknown value for smartos_type: %s" % smartos_type) +def identify_file(content_f): +    cmd = ["file", "--brief", "--mime-type", content_f] +    f_type = None +    try: +        (f_type, _err) = util.subp(cmd) +        LOG.debug("script %s mime type is %s", content_f, f_type) +    except util.ProcessExecutionError as e: +        util.logexc( +            LOG, ("Failed to identify script type for %s" % content_f, e)) +    return None if f_type is None else f_type.strip() + +  def write_boot_content(content, content_f, link=None, shebang=False,                         mode=0o400):      """ @@ -619,18 +727,11 @@ def write_boot_content(content, content_f, link=None, shebang=False,      util.write_file(content_f, content, mode=mode)      if shebang and not content.startswith("#!"): -        try: -            cmd = ["file", "--brief", "--mime-type", content_f] -            (f_type, _err) = util.subp(cmd) -            LOG.debug("script %s mime type is %s", content_f, f_type) -            if f_type.strip() == "text/plain": -                new_content = "\n".join(["#!/bin/bash", content]) -                util.write_file(content_f, new_content, mode=mode) -                LOG.debug("added shebang to file %s", content_f) - -        except Exception as e: -            util.logexc(LOG, ("Failed to identify script type for %s" % -                              content_f, e)) +        f_type = identify_file(content_f) +        if f_type == "text/plain": +            util.write_file( +                content_f, "\n".join(["#!/bin/bash", content]), mode=mode) +            LOG.debug("added shebang to file %s", content_f)      if link:          try: @@ -650,7 +751,7 @@ def get_smartos_environ(uname_version=None, product_name=None):      # report 'BrandZ virtual linux' as the kernel version      if uname_version is None:          uname_version = uname[3] -    if uname_version.lower() == 'brandz virtual linux': +    if uname_version == 'BrandZ virtual linux':          return SMARTOS_ENV_LX_BRAND      if product_name is None: @@ -658,7 +759,7 @@ def get_smartos_environ(uname_version=None, product_name=None):      else:          system_type = product_name -    if system_type and 'smartdc' in system_type.lower(): +    if system_type and system_type.startswith('SmartDC'):          return SMARTOS_ENV_KVM      return None @@ -666,7 +767,8 @@ def get_smartos_environ(uname_version=None, product_name=None):  # Convert SMARTOS 'sdc:nics' data to network_config yaml  def convert_smartos_network_data(network_data=None, -                                 dns_servers=None, dns_domain=None): +                                 dns_servers=None, dns_domain=None, +                                 routes=None):      """Return a dictionary of network_config by parsing provided         SMARTOS sdc:nics configuration data @@ -684,6 +786,10 @@ def convert_smartos_network_data(network_data=None,      keys are related to ip configuration.  For each ip in the 'ips' list      we create a subnet entry under 'subnets' pairing the ip to a one in      the 'gateways' list. + +    Each route in sdc:routes is mapped to a route on each interface. +    The sdc:routes properties 'dst' and 'gateway' map to 'network' and +    'gateway'.  The 'linklocal' sdc:routes property is ignored.      """      valid_keys = { @@ -706,6 +812,10 @@ def convert_smartos_network_data(network_data=None,              'scope',              'type',          ], +        'route': [ +            'network', +            'gateway', +        ],      }      if dns_servers: @@ -720,6 +830,9 @@ def convert_smartos_network_data(network_data=None,      else:          dns_domain = [] +    if not routes: +        routes = [] +      def is_valid_ipv4(addr):          return '.' in addr @@ -746,6 +859,7 @@ def convert_smartos_network_data(network_data=None,              if ip == "dhcp":                  subnet = {'type': 'dhcp4'}              else: +                routeents = []                  subnet = dict((k, v) for k, v in nic.items()                                if k in valid_keys['subnet'])                  subnet.update({ @@ -767,6 +881,25 @@ def convert_smartos_network_data(network_data=None,                              pgws[proto]['gw'] = gateways[0]                              subnet.update({'gateway': pgws[proto]['gw']}) +                for route in routes: +                    rcfg = dict((k, v) for k, v in route.items() +                                if k in valid_keys['route']) +                    # Linux uses the value of 'gateway' to determine +                    # automatically if the route is a forward/next-hop +                    # (non-local IP for gateway) or an interface/resolver +                    # (local IP for gateway).  So we can ignore the +                    # 'interface' attribute of sdc:routes, because SDC +                    # guarantees that the gateway is a local IP for +                    # "interface=true". +                    # +                    # Eventually we should be smart and compare "gateway" +                    # to see if it's in the prefix.  We can then smartly +                    # add or not-add this route.  But for now, +                    # when in doubt, use brute force! Routes for everyone! +                    rcfg.update({'network': route['dst']}) +                    routeents.append(rcfg) +                    subnet.update({'routes': routeents}) +              subnets.append(subnet)          cfg.update({'subnets': subnets})          config.append(cfg) @@ -810,12 +943,14 @@ if __name__ == "__main__":              keyname = SMARTOS_ATTRIB_JSON[key]              data[key] = client.get_json(keyname)          elif key == "network_config": -            for depkey in ('network-data', 'dns_servers', 'dns_domain'): +            for depkey in ('network-data', 'dns_servers', 'dns_domain', +                           'routes'):                  load_key(client, depkey, data)              data[key] = convert_smartos_network_data(                  network_data=data['network-data'],                  dns_servers=data['dns_servers'], -                dns_domain=data['dns_domain']) +                dns_domain=data['dns_domain'], +                routes=data['routes'])          else:              if key in SMARTOS_ATTRIB_MAP:                  keyname, strip = SMARTOS_ATTRIB_MAP[key] diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index df0b374a..5ac98826 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -9,6 +9,7 @@  # This file is part of cloud-init. See LICENSE file for license information.  import abc +from collections import namedtuple  import copy  import json  import os @@ -17,6 +18,8 @@ import six  from cloudinit.atomic_helper import write_json  from cloudinit import importer  from cloudinit import log as logging +from cloudinit import net +from cloudinit.event import EventType  from cloudinit import type_utils  from cloudinit import user_data as ud  from cloudinit import util @@ -35,12 +38,23 @@ DEP_FILESYSTEM = "FILESYSTEM"  DEP_NETWORK = "NETWORK"  DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +EXPERIMENTAL_TEXT = ( +    "EXPERIMENTAL: The structure and format of content scoped under the 'ds'" +    " key may change in subsequent releases of cloud-init.") + + +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file  INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user'  # Key which can be provide a cloud's official product name to cloud-init  METADATA_CLOUD_NAME_KEY = 'cloud-name' +UNSET = "_unset" +  LOG = logging.getLogger(__name__) @@ -48,26 +62,64 @@ class DataSourceNotFoundException(Exception):      pass -def process_base64_metadata(metadata, key_path=''): -    """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +class InvalidMetaDataException(Exception): +    """Raised when metadata is broken, unavailable or disabled.""" +    pass + + +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): +    """Process all instance metadata cleaning it up for persisting as json. + +    Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + +    @return Dict copy of processed metadata. +    """      md_copy = copy.deepcopy(metadata) -    md_copy['base64-encoded-keys'] = [] +    md_copy['base64_encoded_keys'] = [] +    md_copy['sensitive_keys'] = []      for key, val in metadata.items():          if key_path:              sub_key_path = key_path + '/' + key          else:              sub_key_path = key +        if key in sensitive_keys or sub_key_path in sensitive_keys: +            md_copy['sensitive_keys'].append(sub_key_path)          if isinstance(val, str) and val.startswith('ci-b64:'): -            md_copy['base64-encoded-keys'].append(sub_key_path) +            md_copy['base64_encoded_keys'].append(sub_key_path)              md_copy[key] = val.replace('ci-b64:', '')          if isinstance(val, dict): -            return_val = process_base64_metadata(val, sub_key_path) -            md_copy['base64-encoded-keys'].extend( -                return_val.pop('base64-encoded-keys')) +            return_val = process_instance_metadata( +                val, sub_key_path, sensitive_keys) +            md_copy['base64_encoded_keys'].extend( +                return_val.pop('base64_encoded_keys')) +            md_copy['sensitive_keys'].extend( +                return_val.pop('sensitive_keys'))              md_copy[key] = return_val      return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): +    """Redact any sensitive keys from to provided metadata dictionary. + +    Replace any keys values listed in 'sensitive_keys' with redact_value. +    """ +    if not metadata.get('sensitive_keys', []): +        return metadata +    md_copy = copy.deepcopy(metadata) +    for key_path in metadata.get('sensitive_keys'): +        path_parts = key_path.split('/') +        obj = md_copy +        for path in path_parts: +            if isinstance(obj[path], dict) and path != path_parts[-1]: +                obj = obj[path] +        obj[path] = redact_value +    return md_copy + + +URLParams = namedtuple( +    'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) + +  @six.add_metaclass(abc.ABCMeta)  class DataSource(object): @@ -81,6 +133,37 @@ class DataSource(object):      # Cached cloud_name as determined by _get_cloud_name      _cloud_name = None +    # Track the discovered fallback nic for use in configuration generation. +    _fallback_interface = None + +    # read_url_params +    url_max_wait = -1   # max_wait < 0 means do not wait +    url_timeout = 10    # timeout for each metadata url read attempt +    url_retries = 5     # number of times to retry url upon 404 + +    # The datasource defines a set of supported EventTypes during which +    # the datasource can react to changes in metadata and regenerate +    # network configuration on metadata changes. +    # A datasource which supports writing network config on each system boot +    # would call update_events['network'].add(EventType.BOOT). + +    # Default: generate network config on new instance id (first boot). +    update_events = {'network': set([EventType.BOOT_NEW_INSTANCE])} + +    # N-tuple listing default values for any metadata-related class +    # attributes cached on an instance by a process_data runs. These attribute +    # values are reset via clear_cached_attrs during any update_metadata call. +    cached_attr_defaults = ( +        ('ec2_metadata', UNSET), ('network_json', UNSET), +        ('metadata', {}), ('userdata', None), ('userdata_raw', None), +        ('vendordata', None), ('vendordata_raw', None)) + +    _dirty_cache = False + +    # N-tuple of keypaths or keynames redact from instance-data.json for +    # non-root users +    sensitive_metadata_keys = ('security-credentials',) +      def __init__(self, sys_cfg, distro, paths, ud_proc=None):          self.sys_cfg = sys_cfg          self.distro = distro @@ -106,49 +189,140 @@ class DataSource(object):      def _get_standardized_metadata(self):          """Return a dictionary of standardized metadata keys.""" -        return {'v1': { -            'local-hostname': self.get_hostname(), -            'instance-id': self.get_instance_id(), -            'cloud-name': self.cloud_name, -            'region': self.region, -            'availability-zone': self.availability_zone}} +        local_hostname = self.get_hostname() +        instance_id = self.get_instance_id() +        availability_zone = self.availability_zone +        cloud_name = self.cloud_name +        # When adding new standard keys prefer underscore-delimited instead +        # of hyphen-delimted to support simple variable references in jinja +        # templates. +        return { +            'v1': { +                'availability-zone': availability_zone, +                'availability_zone': availability_zone, +                'cloud-name': cloud_name, +                'cloud_name': cloud_name, +                'instance-id': instance_id, +                'instance_id': instance_id, +                'local-hostname': local_hostname, +                'local_hostname': local_hostname, +                'region': self.region}} + +    def clear_cached_attrs(self, attr_defaults=()): +        """Reset any cached metadata attributes to datasource defaults. + +        @param attr_defaults: Optional tuple of (attr, value) pairs to +           set instead of cached_attr_defaults. +        """ +        if not self._dirty_cache: +            return +        if attr_defaults: +            attr_values = attr_defaults +        else: +            attr_values = self.cached_attr_defaults + +        for attribute, value in attr_values: +            if hasattr(self, attribute): +                setattr(self, attribute, value) +        if not attr_defaults: +            self._dirty_cache = False      def get_data(self):          """Datasources implement _get_data to setup metadata and userdata_raw.          Minimally, the datasource should return a boolean True on success.          """ +        self._dirty_cache = True          return_value = self._get_data() -        json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)          if not return_value:              return return_value +        self.persist_instance_data() +        return return_value +    def persist_instance_data(self): +        """Process and write INSTANCE_JSON_FILE with all instance metadata. + +        Replace any hyphens with underscores in key names for use in template +        processing. + +        @return True on successful write, False otherwise. +        """          instance_data = { -            'ds': { -                'meta-data': self.metadata, -                'user-data': self.get_userdata_raw(), -                'vendor-data': self.get_vendordata_raw()}} +            'ds': {'_doc': EXPERIMENTAL_TEXT, +                   'meta_data': self.metadata}} +        if hasattr(self, 'network_json'): +            network_json = getattr(self, 'network_json') +            if network_json != UNSET: +                instance_data['ds']['network_json'] = network_json +        if hasattr(self, 'ec2_metadata'): +            ec2_metadata = getattr(self, 'ec2_metadata') +            if ec2_metadata != UNSET: +                instance_data['ds']['ec2_metadata'] = ec2_metadata          instance_data.update(              self._get_standardized_metadata())          try:              # Process content base64encoding unserializable values              content = util.json_dumps(instance_data) -            # Strip base64: prefix and return base64-encoded-keys -            processed_data = process_base64_metadata(json.loads(content)) +            # Strip base64: prefix and set base64_encoded_keys list. +            processed_data = process_instance_metadata( +                json.loads(content), +                sensitive_keys=self.sensitive_metadata_keys)          except TypeError as e:              LOG.warning('Error persisting instance-data.json: %s', str(e)) -            return return_value +            return False          except UnicodeDecodeError as e:              LOG.warning('Error persisting instance-data.json: %s', str(e)) -            return return_value -        write_json(json_file, processed_data, mode=0o600) -        return return_value +            return False +        json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) +        write_json(json_file, processed_data)  # World readable +        json_sensitive_file = os.path.join(self.paths.run_dir, +                                           INSTANCE_JSON_SENSITIVE_FILE) +        write_json(json_sensitive_file, +                   redact_sensitive_keys(processed_data), mode=0o600) +        return True      def _get_data(self): +        """Walk metadata sources, process crawled data and save attributes."""          raise NotImplementedError(              'Subclasses of DataSource must implement _get_data which'              ' sets self.metadata, vendordata_raw and userdata_raw.') +    def get_url_params(self): +        """Return the Datasource's prefered url_read parameters. + +        Subclasses may override url_max_wait, url_timeout, url_retries. + +        @return: A URLParams object with max_wait_seconds, timeout_seconds, +            num_retries. +        """ +        max_wait = self.url_max_wait +        try: +            max_wait = int(self.ds_cfg.get("max_wait", self.url_max_wait)) +        except ValueError: +            util.logexc( +                LOG, "Config max_wait '%s' is not an int, using default '%s'", +                self.ds_cfg.get("max_wait"), max_wait) + +        timeout = self.url_timeout +        try: +            timeout = max( +                0, int(self.ds_cfg.get("timeout", self.url_timeout))) +        except ValueError: +            timeout = self.url_timeout +            util.logexc( +                LOG, "Config timeout '%s' is not an int, using default '%s'", +                self.ds_cfg.get('timeout'), timeout) + +        retries = self.url_retries +        try: +            retries = int(self.ds_cfg.get("retries", self.url_retries)) +        except Exception: +            util.logexc( +                LOG, "Config retries '%s' is not an int, using default '%s'", +                self.ds_cfg.get('retries'), retries) + +        return URLParams(max_wait, timeout, retries) +      def get_userdata(self, apply_filter=False):          if self.userdata is None:              self.userdata = self.ud_proc.process(self.get_userdata_raw()) @@ -162,6 +336,17 @@ class DataSource(object):          return self.vendordata      @property +    def fallback_interface(self): +        """Determine the network interface used during local network config.""" +        if self._fallback_interface is None: +            self._fallback_interface = net.find_fallback_nic() +            if self._fallback_interface is None: +                LOG.warning( +                    "Did not find a fallback interface on %s.", +                    self.cloud_name) +        return self._fallback_interface + +    @property      def cloud_name(self):          """Return lowercase cloud name as determined by the datasource. @@ -340,6 +525,43 @@ class DataSource(object):      def get_package_mirror_info(self):          return self.distro.get_package_mirror_info(data_source=self) +    def update_metadata(self, source_event_types): +        """Refresh cached metadata if the datasource supports this event. + +        The datasource has a list of update_events which +        trigger refreshing all cached metadata as well as refreshing the +        network configuration. + +        @param source_event_types: List of EventTypes which may trigger a +            metadata update. + +        @return True if the datasource did successfully update cached metadata +            due to source_event_type. +        """ +        supported_events = {} +        for event in source_event_types: +            for update_scope, update_events in self.update_events.items(): +                if event in update_events: +                    if not supported_events.get(update_scope): +                        supported_events[update_scope] = set() +                    supported_events[update_scope].add(event) +        for scope, matched_events in supported_events.items(): +            LOG.debug( +                "Update datasource metadata and %s config due to events: %s", +                scope, ', '.join(matched_events)) +            # Each datasource has a cached config property which needs clearing +            # Once cleared that config property will be regenerated from +            # current metadata. +            self.clear_cached_attrs((('_%s_config' % scope, UNSET),)) +        if supported_events: +            self.clear_cached_attrs() +            result = self.get_data() +            if result: +                return True +        LOG.debug("Datasource %s not updated for events: %s", self, +                  ', '.join(source_event_types)) +        return False +      def check_instance_id(self, sys_cfg):          # quickly (local check only) if self.instance_id is still          return False @@ -444,7 +666,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list, reporter):              with myrep:                  LOG.debug("Seeing if we can get any data from %s", cls)                  s = cls(sys_cfg, distro, paths) -                if s.get_data(): +                if s.update_metadata([EventType.BOOT_NEW_INSTANCE]):                      myrep.message = "found %s data from %s" % (mode, name)                      return (s, type_utils.obj_name(cls))          except Exception: @@ -517,6 +739,10 @@ def convert_vendordata(data, recurse=True):      raise ValueError("Unknown data type for vendordata: %s" % type(data)) +class BrokenMetadata(IOError): +    pass + +  # 'depends' is a list of dependencies (DEP_FILESYSTEM)  # ds_list is a list of 2 item lists  # ds_list = [ diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 90c12df1..e5696b1f 100644 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -14,6 +14,7 @@ from cloudinit import temp_utils  from contextlib import contextmanager  from xml.etree import ElementTree +from cloudinit import url_helper  from cloudinit import util  LOG = logging.getLogger(__name__) @@ -55,14 +56,14 @@ class AzureEndpointHttpClient(object):          if secure:              headers = self.headers.copy()              headers.update(self.extra_secure_headers) -        return util.read_file_or_url(url, headers=headers) +        return url_helper.read_file_or_url(url, headers=headers)      def post(self, url, data=None, extra_headers=None):          headers = self.headers          if extra_headers is not None:              headers = self.headers.copy()              headers.update(extra_headers) -        return util.read_file_or_url(url, data=data, headers=headers) +        return url_helper.read_file_or_url(url, data=data, headers=headers)  class GoalState(object): diff --git a/cloudinit/sources/helpers/digitalocean.py b/cloudinit/sources/helpers/digitalocean.py index 693f8d5c..0e7cccac 100644 --- a/cloudinit/sources/helpers/digitalocean.py +++ b/cloudinit/sources/helpers/digitalocean.py @@ -41,10 +41,9 @@ def assign_ipv4_link_local(nic=None):                             "address")      try: -        (result, _err) = util.subp(ip_addr_cmd) +        util.subp(ip_addr_cmd)          LOG.debug("assigned ip4LL address '%s' to '%s'", addr, nic) - -        (result, _err) = util.subp(ip_link_cmd) +        util.subp(ip_link_cmd)          LOG.debug("brought device '%s' up", nic)      except Exception:          util.logexc(LOG, "ip4LL address assignment of '%s' to '%s' failed." @@ -75,7 +74,7 @@ def del_ipv4_link_local(nic=None):      ip_addr_cmd = ['ip', 'addr', 'flush', 'dev', nic]      try: -        (result, _err) = util.subp(ip_addr_cmd) +        util.subp(ip_addr_cmd)          LOG.debug("removed ip4LL addresses from %s", nic)      except Exception as e: diff --git a/cloudinit/sources/helpers/openstack.py b/cloudinit/sources/helpers/openstack.py index 26f3168d..9c29ceac 100644 --- a/cloudinit/sources/helpers/openstack.py +++ b/cloudinit/sources/helpers/openstack.py @@ -21,6 +21,8 @@ from cloudinit import sources  from cloudinit import url_helper  from cloudinit import util +from cloudinit.sources import BrokenMetadata +  # See https://docs.openstack.org/user-guide/cli-config-drive.html  LOG = logging.getLogger(__name__) @@ -36,21 +38,38 @@ KEY_COPIES = (      ('local-hostname', 'hostname', False),      ('instance-id', 'uuid', True),  ) + +# Versions and names taken from nova source nova/api/metadata/base.py  OS_LATEST = 'latest'  OS_FOLSOM = '2012-08-10'  OS_GRIZZLY = '2013-04-04'  OS_HAVANA = '2013-10-17'  OS_LIBERTY = '2015-10-15' +# NEWTON_ONE adds 'devices' to md (sriov-pf-passthrough-neutron-port-vlan) +OS_NEWTON_ONE = '2016-06-30' +# NEWTON_TWO adds vendor_data2.json (vendordata-reboot) +OS_NEWTON_TWO = '2016-10-06' +# OS_OCATA adds 'vif' field to devices (sriov-pf-passthrough-neutron-port-vlan) +OS_OCATA = '2017-02-22' +# OS_ROCKY adds a vf_trusted field to devices (sriov-trusted-vfs) +OS_ROCKY = '2018-08-27' + +  # keep this in chronological order. new supported versions go at the end.  OS_VERSIONS = (      OS_FOLSOM,      OS_GRIZZLY,      OS_HAVANA,      OS_LIBERTY, +    OS_NEWTON_ONE, +    OS_NEWTON_TWO, +    OS_OCATA, +    OS_ROCKY,  )  PHYSICAL_TYPES = (      None, +    'bgpovs',  # not present in OpenStack upstream but used on OVH cloud.      'bridge',      'dvs',      'ethernet', @@ -68,10 +87,6 @@ class NonReadable(IOError):      pass -class BrokenMetadata(IOError): -    pass - -  class SourceMixin(object):      def _ec2_name_to_device(self, name):          if not self.ec2_metadata: @@ -441,7 +456,7 @@ class MetadataReader(BaseReader):              return self._versions          found = []          version_path = self._path_join(self.base_path, "openstack") -        content = self._path_read(version_path) +        content = self._path_read(version_path, decode=True)          for line in content.splitlines():              line = line.strip()              if not line: @@ -589,6 +604,8 @@ def convert_net_json(network_json=None, known_macs=None):              cfg.update({'type': 'physical', 'mac_address': link_mac_addr})          elif link['type'] in ['bond']:              params = {} +            if link_mac_addr: +                params['mac_address'] = link_mac_addr              for k, v in link.items():                  if k == 'bond_links':                      continue @@ -638,7 +655,7 @@ def convert_net_json(network_json=None, known_macs=None):              known_macs = net.get_interfaces_by_mac()          # go through and fill out the link_id_info with names -        for link_id, info in link_id_info.items(): +        for _link_id, info in link_id_info.items():              if info.get('name'):                  continue              if info.get('mac') in known_macs: @@ -658,6 +675,17 @@ def convert_net_json(network_json=None, known_macs=None):              else:                  cfg[key] = fmt % link_id_info[target]['name'] +    # Infiniband interfaces may be referenced in network_data.json by a 6 byte +    # Ethernet MAC-style address, and we use that address to look up the +    # interface name above. Now ensure that the hardware address is set to the +    # full 20 byte address. +    ib_known_hwaddrs = net.get_ib_hwaddrs_by_interface() +    if ib_known_hwaddrs: +        for cfg in config: +            if cfg['name'] in ib_known_hwaddrs: +                cfg['mac_address'] = ib_known_hwaddrs[cfg['name']] +                cfg['type'] = 'infiniband' +      for service in services:          cfg = service          cfg.update({'type': 'nameserver'}) diff --git a/cloudinit/sources/helpers/vmware/imc/config_nic.py b/cloudinit/sources/helpers/vmware/imc/config_nic.py index 2d8900e2..e1890e23 100644 --- a/cloudinit/sources/helpers/vmware/imc/config_nic.py +++ b/cloudinit/sources/helpers/vmware/imc/config_nic.py @@ -73,7 +73,7 @@ class NicConfigurator(object):          The mac address(es) are in the lower case          """          cmd = ['ip', 'addr', 'show'] -        (output, err) = util.subp(cmd) +        output, _err = util.subp(cmd)          sections = re.split(r'\n\d+: ', '\n' + output)[1:]          macPat = r'link/ether (([0-9A-Fa-f]{2}[:]){5}([0-9A-Fa-f]{2}))' @@ -164,7 +164,7 @@ class NicConfigurator(object):              return ([subnet], route_list)          # Add routes if there is no primary nic -        if not self._primaryNic: +        if not self._primaryNic and v4.gateways:              route_list.extend(self.gen_ipv4_route(nic,                                                    v4.gateways,                                                    v4.netmask)) diff --git a/cloudinit/sources/helpers/vmware/imc/config_passwd.py b/cloudinit/sources/helpers/vmware/imc/config_passwd.py index 75cfbaaf..8c91fa41 100644 --- a/cloudinit/sources/helpers/vmware/imc/config_passwd.py +++ b/cloudinit/sources/helpers/vmware/imc/config_passwd.py @@ -56,10 +56,10 @@ class PasswordConfigurator(object):          LOG.info('Expiring password.')          for user in uidUserList:              try: -                out, err = util.subp(['passwd', '--expire', user]) +                util.subp(['passwd', '--expire', user])              except util.ProcessExecutionError as e:                  if os.path.exists('/usr/bin/chage'): -                    out, e = util.subp(['chage', '-d', '0', user]) +                    util.subp(['chage', '-d', '0', user])                  else:                      LOG.warning('Failed to expire password for %s with error: '                                  '%s', user, e) diff --git a/cloudinit/sources/helpers/vmware/imc/guestcust_util.py b/cloudinit/sources/helpers/vmware/imc/guestcust_util.py index 44075255..a590f323 100644 --- a/cloudinit/sources/helpers/vmware/imc/guestcust_util.py +++ b/cloudinit/sources/helpers/vmware/imc/guestcust_util.py @@ -91,7 +91,7 @@ def enable_nics(nics):      for attempt in range(0, enableNicsWaitRetries):          logger.debug("Trying to connect interfaces, attempt %d", attempt) -        (out, err) = set_customization_status( +        (out, _err) = set_customization_status(              GuestCustStateEnum.GUESTCUST_STATE_RUNNING,              GuestCustEventEnum.GUESTCUST_EVENT_ENABLE_NICS,              nics) @@ -104,7 +104,7 @@ def enable_nics(nics):              return          for count in range(0, enableNicsWaitCount): -            (out, err) = set_customization_status( +            (out, _err) = set_customization_status(                  GuestCustStateEnum.GUESTCUST_STATE_RUNNING,                  GuestCustEventEnum.GUESTCUST_EVENT_QUERY_NICS,                  nics) diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index e7fda22a..8082019e 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,14 +1,17 @@  # This file is part of cloud-init. See LICENSE file for license information. +import copy  import inspect  import os  import six  import stat +from cloudinit.event import EventType  from cloudinit.helpers import Paths  from cloudinit import importer  from cloudinit.sources import ( -    INSTANCE_JSON_FILE, DataSource) +    EXPERIMENTAL_TEXT, INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, +    REDACT_SENSITIVE_VALUE, UNSET, DataSource, redact_sensitive_keys)  from cloudinit.tests.helpers import CiTestCase, skipIf, mock  from cloudinit.user_data import UserDataProcessor  from cloudinit import util @@ -17,25 +20,32 @@ from cloudinit import util  class DataSourceTestSubclassNet(DataSource):      dsname = 'MyTestSubclass' +    url_max_wait = 55 -    def __init__(self, sys_cfg, distro, paths, custom_userdata=None): +    def __init__(self, sys_cfg, distro, paths, custom_metadata=None, +                 custom_userdata=None, get_data_retval=True):          super(DataSourceTestSubclassNet, self).__init__(              sys_cfg, distro, paths)          self._custom_userdata = custom_userdata +        self._custom_metadata = custom_metadata +        self._get_data_retval = get_data_retval      def _get_cloud_name(self):          return 'SubclassCloudName'      def _get_data(self): -        self.metadata = {'availability_zone': 'myaz', -                         'local-hostname': 'test-subclass-hostname', -                         'region': 'myregion'} +        if self._custom_metadata: +            self.metadata = self._custom_metadata +        else: +            self.metadata = {'availability_zone': 'myaz', +                             'local-hostname': 'test-subclass-hostname', +                             'region': 'myregion'}          if self._custom_userdata:              self.userdata_raw = self._custom_userdata          else:              self.userdata_raw = 'userdata_raw'          self.vendordata_raw = 'vendordata_raw' -        return True +        return self._get_data_retval  class InvalidDataSourceTestSubclassNet(DataSource): @@ -70,8 +80,7 @@ class TestDataSource(CiTestCase):          """Init uses DataSource.dsname for sourcing ds_cfg."""          sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}}          distro = 'distrotest'  # generally should be a Distro object -        paths = Paths({}) -        datasource = DataSourceTestSubclassNet(sys_cfg, distro, paths) +        datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths)          self.assertEqual({'key2': False}, datasource.ds_cfg)      def test_str_is_classname(self): @@ -81,6 +90,91 @@ class TestDataSource(CiTestCase):              'DataSourceTestSubclassNet',              str(DataSourceTestSubclassNet('', '', self.paths))) +    def test_datasource_get_url_params_defaults(self): +        """get_url_params default url config settings for the datasource.""" +        params = self.datasource.get_url_params() +        self.assertEqual(params.max_wait_seconds, self.datasource.url_max_wait) +        self.assertEqual(params.timeout_seconds, self.datasource.url_timeout) +        self.assertEqual(params.num_retries, self.datasource.url_retries) + +    def test_datasource_get_url_params_subclassed(self): +        """Subclasses can override get_url_params defaults.""" +        sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}} +        distro = 'distrotest'  # generally should be a Distro object +        datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths) +        expected = (datasource.url_max_wait, datasource.url_timeout, +                    datasource.url_retries) +        url_params = datasource.get_url_params() +        self.assertNotEqual(self.datasource.get_url_params(), url_params) +        self.assertEqual(expected, url_params) + +    def test_datasource_get_url_params_ds_config_override(self): +        """Datasource configuration options can override url param defaults.""" +        sys_cfg = { +            'datasource': { +                'MyTestSubclass': { +                    'max_wait': '1', 'timeout': '2', 'retries': '3'}}} +        datasource = DataSourceTestSubclassNet( +            sys_cfg, self.distro, self.paths) +        expected = (1, 2, 3) +        url_params = datasource.get_url_params() +        self.assertNotEqual( +            (datasource.url_max_wait, datasource.url_timeout, +             datasource.url_retries), +            url_params) +        self.assertEqual(expected, url_params) + +    def test_datasource_get_url_params_is_zero_or_greater(self): +        """get_url_params ignores timeouts with a value below 0.""" +        # Set an override that is below 0 which gets ignored. +        sys_cfg = {'datasource': {'_undef': {'timeout': '-1'}}} +        datasource = DataSource(sys_cfg, self.distro, self.paths) +        (_max_wait, timeout, _retries) = datasource.get_url_params() +        self.assertEqual(0, timeout) + +    def test_datasource_get_url_uses_defaults_on_errors(self): +        """On invalid system config values for url_params defaults are used.""" +        # All invalid values should be logged +        sys_cfg = {'datasource': { +            '_undef': { +                'max_wait': 'nope', 'timeout': 'bug', 'retries': 'nonint'}}} +        datasource = DataSource(sys_cfg, self.distro, self.paths) +        url_params = datasource.get_url_params() +        expected = (datasource.url_max_wait, datasource.url_timeout, +                    datasource.url_retries) +        self.assertEqual(expected, url_params) +        logs = self.logs.getvalue() +        expected_logs = [ +            "Config max_wait 'nope' is not an int, using default '-1'", +            "Config timeout 'bug' is not an int, using default '10'", +            "Config retries 'nonint' is not an int, using default '5'", +        ] +        for log in expected_logs: +            self.assertIn(log, logs) + +    @mock.patch('cloudinit.sources.net.find_fallback_nic') +    def test_fallback_interface_is_discovered(self, m_get_fallback_nic): +        """The fallback_interface is discovered via find_fallback_nic.""" +        m_get_fallback_nic.return_value = 'nic9' +        self.assertEqual('nic9', self.datasource.fallback_interface) + +    @mock.patch('cloudinit.sources.net.find_fallback_nic') +    def test_fallback_interface_logs_undiscovered(self, m_get_fallback_nic): +        """Log a warning when fallback_interface can not discover the nic.""" +        self.datasource._cloud_name = 'MySupahCloud' +        m_get_fallback_nic.return_value = None  # Couldn't discover nic +        self.assertIsNone(self.datasource.fallback_interface) +        self.assertEqual( +            'WARNING: Did not find a fallback interface on MySupahCloud.\n', +            self.logs.getvalue()) + +    @mock.patch('cloudinit.sources.net.find_fallback_nic') +    def test_wb_fallback_interface_is_cached(self, m_get_fallback_nic): +        """The fallback_interface is cached and won't be rediscovered.""" +        self.datasource._fallback_interface = 'nic10' +        self.assertEqual('nic10', self.datasource.fallback_interface) +        m_get_fallback_nic.assert_not_called() +      def test__get_data_unimplemented(self):          """Raise an error when _get_data is not implemented."""          with self.assertRaises(NotImplementedError) as context_manager: @@ -178,8 +272,19 @@ class TestDataSource(CiTestCase):                  self.assertEqual('fqdnhostname.domain.com',                                   datasource.get_hostname(fqdn=True)) -    def test_get_data_write_json_instance_data(self): -        """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" +    def test_get_data_does_not_write_instance_data_on_failure(self): +        """get_data does not write INSTANCE_JSON_FILE on get_data False.""" +        tmp = self.tmp_dir() +        datasource = DataSourceTestSubclassNet( +            self.sys_cfg, self.distro, Paths({'run_dir': tmp}), +            get_data_retval=False) +        self.assertFalse(datasource.get_data()) +        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) +        self.assertFalse( +            os.path.exists(json_file), 'Found unexpected file %s' % json_file) + +    def test_get_data_writes_json_instance_data_on_success(self): +        """get_data writes INSTANCE_JSON_FILE to run_dir as world readable."""          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -187,40 +292,126 @@ class TestDataSource(CiTestCase):          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file)          expected = { -            'base64-encoded-keys': [], +            'base64_encoded_keys': [], +            'sensitive_keys': [],              'v1': {                  'availability-zone': 'myaz', +                'availability_zone': 'myaz',                  'cloud-name': 'subclasscloudname', +                'cloud_name': 'subclasscloudname',                  'instance-id': 'iid-datasource', +                'instance_id': 'iid-datasource',                  'local-hostname': 'test-subclass-hostname', +                'local_hostname': 'test-subclass-hostname',                  'region': 'myregion'},              'ds': { -                'meta-data': {'availability_zone': 'myaz', +                '_doc': EXPERIMENTAL_TEXT, +                'meta_data': {'availability_zone': 'myaz',                                'local-hostname': 'test-subclass-hostname', -                              'region': 'myregion'}, -                'user-data': 'userdata_raw', -                'vendor-data': 'vendordata_raw'}} +                              'region': 'myregion'}}}          self.assertEqual(expected, util.load_json(content))          file_stat = os.stat(json_file) +        self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) +        self.assertEqual(expected, util.load_json(content)) + +    def test_get_data_writes_json_instance_data_sensitive(self): +        """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" +        tmp = self.tmp_dir() +        datasource = DataSourceTestSubclassNet( +            self.sys_cfg, self.distro, Paths({'run_dir': tmp}), +            custom_metadata={ +                'availability_zone': 'myaz', +                'local-hostname': 'test-subclass-hostname', +                'region': 'myregion', +                'some': {'security-credentials': { +                    'cred1': 'sekret', 'cred2': 'othersekret'}}}) +        self.assertEqual( +            ('security-credentials',), datasource.sensitive_metadata_keys) +        datasource.get_data() +        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) +        sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) +        redacted = util.load_json(util.load_file(json_file)) +        self.assertEqual( +            {'cred1': 'sekret', 'cred2': 'othersekret'}, +            redacted['ds']['meta_data']['some']['security-credentials']) +        content = util.load_file(sensitive_json_file) +        expected = { +            'base64_encoded_keys': [], +            'sensitive_keys': ['ds/meta_data/some/security-credentials'], +            'v1': { +                'availability-zone': 'myaz', +                'availability_zone': 'myaz', +                'cloud-name': 'subclasscloudname', +                'cloud_name': 'subclasscloudname', +                'instance-id': 'iid-datasource', +                'instance_id': 'iid-datasource', +                'local-hostname': 'test-subclass-hostname', +                'local_hostname': 'test-subclass-hostname', +                'region': 'myregion'}, +            'ds': { +                '_doc': EXPERIMENTAL_TEXT, +                'meta_data': { +                    'availability_zone': 'myaz', +                    'local-hostname': 'test-subclass-hostname', +                    'region': 'myregion', +                    'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} +        } +        self.maxDiff = None +        self.assertEqual(expected, util.load_json(content)) +        file_stat = os.stat(sensitive_json_file)          self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) +        self.assertEqual(expected, util.load_json(content))      def test_get_data_handles_redacted_unserializable_content(self):          """get_data warns unserializable content in INSTANCE_JSON_FILE."""          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) -        self.assertTrue(datasource.get_data()) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) +        datasource.get_data()          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file) -        expected_userdata = { +        expected_metadata = {              'key1': 'val1',              'key2': {                  'key2.1': "Warning: redacted unserializable type <class"                            " 'cloudinit.helpers.Paths'>"}}          instance_json = util.load_json(content)          self.assertEqual( -            expected_userdata, instance_json['ds']['user-data']) +            expected_metadata, instance_json['ds']['meta_data']) + +    def test_persist_instance_data_writes_ec2_metadata_when_set(self): +        """When ec2_metadata class attribute is set, persist to json.""" +        tmp = self.tmp_dir() +        datasource = DataSourceTestSubclassNet( +            self.sys_cfg, self.distro, Paths({'run_dir': tmp})) +        datasource.ec2_metadata = UNSET +        datasource.get_data() +        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) +        instance_data = util.load_json(util.load_file(json_file)) +        self.assertNotIn('ec2_metadata', instance_data['ds']) +        datasource.ec2_metadata = {'ec2stuff': 'is good'} +        datasource.persist_instance_data() +        instance_data = util.load_json(util.load_file(json_file)) +        self.assertEqual( +            {'ec2stuff': 'is good'}, +            instance_data['ds']['ec2_metadata']) + +    def test_persist_instance_data_writes_network_json_when_set(self): +        """When network_data.json class attribute is set, persist to json.""" +        tmp = self.tmp_dir() +        datasource = DataSourceTestSubclassNet( +            self.sys_cfg, self.distro, Paths({'run_dir': tmp})) +        datasource.get_data() +        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) +        instance_data = util.load_json(util.load_file(json_file)) +        self.assertNotIn('network_json', instance_data['ds']) +        datasource.network_json = {'network_json': 'is good'} +        datasource.persist_instance_data() +        instance_data = util.load_json(util.load_file(json_file)) +        self.assertEqual( +            {'network_json': 'is good'}, +            instance_data['ds']['network_json'])      @skipIf(not six.PY3, "json serialization on <= py2.7 handles bytes")      def test_get_data_base64encodes_unserializable_bytes(self): @@ -228,17 +419,17 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file)          instance_json = util.load_json(content) -        self.assertEqual( -            ['ds/user-data/key2/key2.1'], -            instance_json['base64-encoded-keys']) +        self.assertItemsEqual( +            ['ds/meta_data/key2/key2.1'], +            instance_json['base64_encoded_keys'])          self.assertEqual(              {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, -            instance_json['ds']['user-data']) +            instance_json['ds']['meta_data'])      @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes")      def test_get_data_handles_bytes_values(self): @@ -246,15 +437,15 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file)          instance_json = util.load_json(content) -        self.assertEqual([], instance_json['base64-encoded-keys']) +        self.assertEqual([], instance_json['base64_encoded_keys'])          self.assertEqual(              {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, -            instance_json['ds']['user-data']) +            instance_json['ds']['meta_data'])      @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")      def test_non_utf8_encoding_logs_warning(self): @@ -262,7 +453,7 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          self.assertFalse(os.path.exists(json_file)) @@ -278,7 +469,7 @@ class TestDataSource(CiTestCase):          base_args = get_args(DataSource.get_hostname)  # pylint: disable=W1505          # Import all DataSource subclasses so we can inspect them.          modules = util.find_modules(os.path.dirname(os.path.dirname(__file__))) -        for loc, name in modules.items(): +        for _loc, name in modules.items():              mod_locs, _ = importer.find_module(name, ['cloudinit.sources'], [])              if mod_locs:                  importer.import_module(mod_locs[0]) @@ -296,3 +487,116 @@ class TestDataSource(CiTestCase):                      get_args(grandchild.get_hostname),  # pylint: disable=W1505                      '%s does not implement DataSource.get_hostname params'                      % grandchild) + +    def test_clear_cached_attrs_resets_cached_attr_class_attributes(self): +        """Class attributes listed in cached_attr_defaults are reset.""" +        count = 0 +        # Setup values for all cached class attributes +        for attr, value in self.datasource.cached_attr_defaults: +            setattr(self.datasource, attr, count) +            count += 1 +        self.datasource._dirty_cache = True +        self.datasource.clear_cached_attrs() +        for attr, value in self.datasource.cached_attr_defaults: +            self.assertEqual(value, getattr(self.datasource, attr)) + +    def test_clear_cached_attrs_noops_on_clean_cache(self): +        """Class attributes listed in cached_attr_defaults are reset.""" +        count = 0 +        # Setup values for all cached class attributes +        for attr, _ in self.datasource.cached_attr_defaults: +            setattr(self.datasource, attr, count) +            count += 1 +        self.datasource._dirty_cache = False   # Fake clean cache +        self.datasource.clear_cached_attrs() +        count = 0 +        for attr, _ in self.datasource.cached_attr_defaults: +            self.assertEqual(count, getattr(self.datasource, attr)) +            count += 1 + +    def test_clear_cached_attrs_skips_non_attr_class_attributes(self): +        """Skip any cached_attr_defaults which aren't class attributes.""" +        self.datasource._dirty_cache = True +        self.datasource.clear_cached_attrs() +        for attr in ('ec2_metadata', 'network_json'): +            self.assertFalse(hasattr(self.datasource, attr)) + +    def test_clear_cached_attrs_of_custom_attrs(self): +        """Custom attr_values can be passed to clear_cached_attrs.""" +        self.datasource._dirty_cache = True +        cached_attr_name = self.datasource.cached_attr_defaults[0][0] +        setattr(self.datasource, cached_attr_name, 'himom') +        self.datasource.myattr = 'orig' +        self.datasource.clear_cached_attrs( +            attr_defaults=(('myattr', 'updated'),)) +        self.assertEqual('himom', getattr(self.datasource, cached_attr_name)) +        self.assertEqual('updated', self.datasource.myattr) + +    def test_update_metadata_only_acts_on_supported_update_events(self): +        """update_metadata won't get_data on unsupported update events.""" +        self.datasource.update_events['network'].discard(EventType.BOOT) +        self.assertEqual( +            {'network': set([EventType.BOOT_NEW_INSTANCE])}, +            self.datasource.update_events) + +        def fake_get_data(): +            raise Exception('get_data should not be called') + +        self.datasource.get_data = fake_get_data +        self.assertFalse( +            self.datasource.update_metadata( +                source_event_types=[EventType.BOOT])) + +    def test_update_metadata_returns_true_on_supported_update_event(self): +        """update_metadata returns get_data response on supported events.""" + +        def fake_get_data(): +            return True + +        self.datasource.get_data = fake_get_data +        self.datasource._network_config = 'something' +        self.datasource._dirty_cache = True +        self.assertTrue( +            self.datasource.update_metadata( +                source_event_types=[ +                    EventType.BOOT, EventType.BOOT_NEW_INSTANCE])) +        self.assertEqual(UNSET, self.datasource._network_config) +        self.assertIn( +            "DEBUG: Update datasource metadata and network config due to" +            " events: New instance first boot", +            self.logs.getvalue()) + + +class TestRedactSensitiveData(CiTestCase): + +    def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): +        """When sensitive_keys is absent or empty from metadata do nothing.""" +        md = {'my': 'data'} +        self.assertEqual( +            md, redact_sensitive_keys(md, redact_value='redacted')) +        md['sensitive_keys'] = [] +        self.assertEqual( +            md, redact_sensitive_keys(md, redact_value='redacted')) + +    def test_redact_sensitive_data_redacts_exact_match_name(self): +        """Only exact matched sensitive_keys are redacted from metadata.""" +        md = {'sensitive_keys': ['md/secure'], +              'md': {'secure': 's3kr1t', 'insecure': 'publik'}} +        secure_md = copy.deepcopy(md) +        secure_md['md']['secure'] = 'redacted' +        self.assertEqual( +            secure_md, +            redact_sensitive_keys(md, redact_value='redacted')) + +    def test_redact_sensitive_data_does_redacts_with_default_string(self): +        """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" +        md = {'sensitive_keys': ['md/secure'], +              'md': {'secure': 's3kr1t', 'insecure': 'publik'}} +        secure_md = copy.deepcopy(md) +        secure_md['md']['secure'] = 'redacted for non-root user' +        self.assertEqual( +            secure_md, +            redact_sensitive_keys(md)) + + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py new file mode 100644 index 00000000..7599126c --- /dev/null +++ b/cloudinit/sources/tests/test_oracle.py @@ -0,0 +1,331 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from cloudinit.sources import DataSourceOracle as oracle +from cloudinit.sources import BrokenMetadata +from cloudinit import helpers + +from cloudinit.tests import helpers as test_helpers + +from textwrap import dedent +import argparse +import httpretty +import json +import mock +import os +import six +import uuid + +DS_PATH = "cloudinit.sources.DataSourceOracle" +MD_VER = "2013-10-17" + + +class TestDataSourceOracle(test_helpers.CiTestCase): +    """Test datasource DataSourceOracle.""" + +    ds_class = oracle.DataSourceOracle + +    my_uuid = str(uuid.uuid4()) +    my_md = {"uuid": "ocid1.instance.oc1.phx.abyhqlj", +             "name": "ci-vm1", "availability_zone": "phx-ad-3", +             "hostname": "ci-vm1hostname", +             "launch_index": 0, "files": [], +             "public_keys": {"0": "ssh-rsa AAAAB3N...== user@host"}, +             "meta": {}} + +    def _patch_instance(self, inst, patches): +        """Patch an instance of a class 'inst'. +        for each name, kwargs in patches: +             inst.name = mock.Mock(**kwargs) +        returns a namespace object that has +             namespace.name = mock.Mock(**kwargs) +        Do not bother with cleanup as instance is assumed transient.""" +        mocks = argparse.Namespace() +        for name, kwargs in patches.items(): +            imock = mock.Mock(name=name, spec=getattr(inst, name), **kwargs) +            setattr(mocks, name, imock) +            setattr(inst, name, imock) +        return mocks + +    def _get_ds(self, sys_cfg=None, distro=None, paths=None, ud_proc=None, +                patches=None): +        if sys_cfg is None: +            sys_cfg = {} +        if patches is None: +            patches = {} +        if paths is None: +            tmpd = self.tmp_dir() +            dirs = {'cloud_dir': self.tmp_path('cloud_dir', tmpd), +                    'run_dir': self.tmp_path('run_dir')} +            for d in dirs.values(): +                os.mkdir(d) +            paths = helpers.Paths(dirs) + +        ds = self.ds_class(sys_cfg=sys_cfg, distro=distro, +                           paths=paths, ud_proc=ud_proc) + +        return ds, self._patch_instance(ds, patches) + +    def test_platform_not_viable_returns_false(self): +        ds, mocks = self._get_ds( +            patches={'_is_platform_viable': {'return_value': False}}) +        self.assertFalse(ds._get_data()) +        mocks._is_platform_viable.assert_called_once_with() + +    @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) +    def test_without_userdata(self, m_is_iscsi_root): +        """If no user-data is provided, it should not be in return dict.""" +        ds, mocks = self._get_ds(patches={ +            '_is_platform_viable': {'return_value': True}, +            'crawl_metadata': { +                'return_value': { +                    MD_VER: {'system_uuid': self.my_uuid, +                             'meta_data': self.my_md}}}}) +        self.assertTrue(ds._get_data()) +        mocks._is_platform_viable.assert_called_once_with() +        mocks.crawl_metadata.assert_called_once_with() +        self.assertEqual(self.my_uuid, ds.system_uuid) +        self.assertEqual(self.my_md['availability_zone'], ds.availability_zone) +        self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys()) +        self.assertEqual(self.my_md['uuid'], ds.get_instance_id()) +        self.assertIsNone(ds.userdata_raw) + +    @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) +    def test_with_vendordata(self, m_is_iscsi_root): +        """Test with vendor data.""" +        vd = {'cloud-init': '#cloud-config\nkey: value'} +        ds, mocks = self._get_ds(patches={ +            '_is_platform_viable': {'return_value': True}, +            'crawl_metadata': { +                'return_value': { +                    MD_VER: {'system_uuid': self.my_uuid, +                             'meta_data': self.my_md, +                             'vendor_data': vd}}}}) +        self.assertTrue(ds._get_data()) +        mocks._is_platform_viable.assert_called_once_with() +        mocks.crawl_metadata.assert_called_once_with() +        self.assertEqual(vd, ds.vendordata_pure) +        self.assertEqual(vd['cloud-init'], ds.vendordata_raw) + +    @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) +    def test_with_userdata(self, m_is_iscsi_root): +        """Ensure user-data is populated if present and is binary.""" +        my_userdata = b'abcdefg' +        ds, mocks = self._get_ds(patches={ +            '_is_platform_viable': {'return_value': True}, +            'crawl_metadata': { +                'return_value': { +                    MD_VER: {'system_uuid': self.my_uuid, +                             'meta_data': self.my_md, +                             'user_data': my_userdata}}}}) +        self.assertTrue(ds._get_data()) +        mocks._is_platform_viable.assert_called_once_with() +        mocks.crawl_metadata.assert_called_once_with() +        self.assertEqual(self.my_uuid, ds.system_uuid) +        self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys()) +        self.assertEqual(self.my_md['uuid'], ds.get_instance_id()) +        self.assertEqual(my_userdata, ds.userdata_raw) + +    @mock.patch(DS_PATH + ".cmdline.read_kernel_cmdline_config") +    @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) +    def test_network_cmdline(self, m_is_iscsi_root, m_cmdline_config): +        """network_config should read kernel cmdline.""" +        distro = mock.MagicMock() +        ds, _ = self._get_ds(distro=distro, patches={ +            '_is_platform_viable': {'return_value': True}, +            'crawl_metadata': { +                'return_value': { +                    MD_VER: {'system_uuid': self.my_uuid, +                             'meta_data': self.my_md}}}}) +        ncfg = {'version': 1, 'config': [{'a': 'b'}]} +        m_cmdline_config.return_value = ncfg +        self.assertTrue(ds._get_data()) +        self.assertEqual(ncfg, ds.network_config) +        m_cmdline_config.assert_called_once_with() +        self.assertFalse(distro.generate_fallback_config.called) + +    @mock.patch(DS_PATH + ".cmdline.read_kernel_cmdline_config") +    @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) +    def test_network_fallback(self, m_is_iscsi_root, m_cmdline_config): +        """test that fallback network is generated if no kernel cmdline.""" +        distro = mock.MagicMock() +        ds, _ = self._get_ds(distro=distro, patches={ +            '_is_platform_viable': {'return_value': True}, +            'crawl_metadata': { +                'return_value': { +                    MD_VER: {'system_uuid': self.my_uuid, +                             'meta_data': self.my_md}}}}) +        ncfg = {'version': 1, 'config': [{'a': 'b'}]} +        m_cmdline_config.return_value = None +        self.assertTrue(ds._get_data()) +        ncfg = {'version': 1, 'config': [{'distro1': 'value'}]} +        distro.generate_fallback_config.return_value = ncfg +        self.assertEqual(ncfg, ds.network_config) +        m_cmdline_config.assert_called_once_with() +        distro.generate_fallback_config.assert_called_once_with() +        self.assertEqual(1, m_cmdline_config.call_count) + +        # test that the result got cached, and the methods not re-called. +        self.assertEqual(ncfg, ds.network_config) +        self.assertEqual(1, m_cmdline_config.call_count) + + +@mock.patch(DS_PATH + "._read_system_uuid", return_value=str(uuid.uuid4())) +class TestReadMetaData(test_helpers.HttprettyTestCase): +    """Test the read_metadata which interacts with http metadata service.""" + +    mdurl = oracle.METADATA_ENDPOINT +    my_md = {"uuid": "ocid1.instance.oc1.phx.abyhqlj", +             "name": "ci-vm1", "availability_zone": "phx-ad-3", +             "hostname": "ci-vm1hostname", +             "launch_index": 0, "files": [], +             "public_keys": {"0": "ssh-rsa AAAAB3N...== user@host"}, +             "meta": {}} + +    def populate_md(self, data): +        """call httppretty.register_url for each item dict 'data', +           including valid indexes. Text values converted to bytes.""" +        httpretty.register_uri( +            httpretty.GET, self.mdurl + MD_VER + "/", +            '\n'.join(data.keys()).encode('utf-8')) +        for k, v in data.items(): +            httpretty.register_uri( +                httpretty.GET, self.mdurl + MD_VER + "/" + k, +                v if not isinstance(v, six.text_type) else v.encode('utf-8')) + +    def test_broken_no_sys_uuid(self, m_read_system_uuid): +        """Datasource requires ability to read system_uuid and true return.""" +        m_read_system_uuid.return_value = None +        self.assertRaises(BrokenMetadata, oracle.read_metadata) + +    def test_broken_no_metadata_json(self, m_read_system_uuid): +        """Datasource requires meta_data.json.""" +        httpretty.register_uri( +            httpretty.GET, self.mdurl + MD_VER + "/", +            '\n'.join(['user_data']).encode('utf-8')) +        with self.assertRaises(BrokenMetadata) as cm: +            oracle.read_metadata() +        self.assertIn("Required field 'meta_data.json' missing", +                      str(cm.exception)) + +    def test_with_userdata(self, m_read_system_uuid): +        data = {'user_data': b'#!/bin/sh\necho hi world\n', +                'meta_data.json': json.dumps(self.my_md)} +        self.populate_md(data) +        result = oracle.read_metadata()[MD_VER] +        self.assertEqual(data['user_data'], result['user_data']) +        self.assertEqual(self.my_md, result['meta_data']) + +    def test_without_userdata(self, m_read_system_uuid): +        data = {'meta_data.json': json.dumps(self.my_md)} +        self.populate_md(data) +        result = oracle.read_metadata()[MD_VER] +        self.assertNotIn('user_data', result) +        self.assertEqual(self.my_md, result['meta_data']) + +    def test_unknown_fields_included(self, m_read_system_uuid): +        """Unknown fields listed in index should be included. +        And those ending in .json should be decoded.""" +        some_data = {'key1': 'data1', 'subk1': {'subd1': 'subv'}} +        some_vendor_data = {'cloud-init': 'foo'} +        data = {'meta_data.json': json.dumps(self.my_md), +                'some_data.json': json.dumps(some_data), +                'vendor_data.json': json.dumps(some_vendor_data), +                'other_blob': b'this is blob'} +        self.populate_md(data) +        result = oracle.read_metadata()[MD_VER] +        self.assertNotIn('user_data', result) +        self.assertEqual(self.my_md, result['meta_data']) +        self.assertEqual(some_data, result['some_data']) +        self.assertEqual(some_vendor_data, result['vendor_data']) +        self.assertEqual(data['other_blob'], result['other_blob']) + + +class TestIsPlatformViable(test_helpers.CiTestCase): +    @mock.patch(DS_PATH + ".util.read_dmi_data", +                return_value=oracle.CHASSIS_ASSET_TAG) +    def test_expected_viable(self, m_read_dmi_data): +        """System with known chassis tag is viable.""" +        self.assertTrue(oracle._is_platform_viable()) +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + +    @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None) +    def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data): +        """System without known chassis tag is not viable.""" +        self.assertFalse(oracle._is_platform_viable()) +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + +    @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs") +    def test_expected_not_viable_other(self, m_read_dmi_data): +        """System with unnown chassis tag is not viable.""" +        self.assertFalse(oracle._is_platform_viable()) +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + + +class TestLoadIndex(test_helpers.CiTestCase): +    """_load_index handles parsing of an index into a proper list. +    The tests here guarantee correct parsing of html version or +    a fixed version.  See the function docstring for more doc.""" + +    _known_html_api_versions = dedent("""\ +        <html> +        <head><title>Index of /openstack/</title></head> +        <body bgcolor="white"> +        <h1>Index of /openstack/</h1><hr><pre><a href="../">../</a> +        <a href="2013-10-17/">2013-10-17/</a>   27-Jun-2018 12:22  - +        <a href="latest/">latest/</a>           27-Jun-2018 12:22  - +        </pre><hr></body> +        </html>""") + +    _known_html_contents = dedent("""\ +        <html> +        <head><title>Index of /openstack/2013-10-17/</title></head> +        <body bgcolor="white"> +        <h1>Index of /openstack/2013-10-17/</h1><hr><pre><a href="../">../</a> +        <a href="meta_data.json">meta_data.json</a>  27-Jun-2018 12:22  679 +        <a href="user_data">user_data</a>            27-Jun-2018 12:22  146 +        </pre><hr></body> +        </html>""") + +    def test_parse_html(self): +        """Test parsing of lower case html.""" +        self.assertEqual( +            ['2013-10-17/', 'latest/'], +            oracle._load_index(self._known_html_api_versions)) +        self.assertEqual( +            ['meta_data.json', 'user_data'], +            oracle._load_index(self._known_html_contents)) + +    def test_parse_html_upper(self): +        """Test parsing of upper case html, although known content is lower.""" +        def _toupper(data): +            return data.replace("<a", "<A").replace("html>", "HTML>") + +        self.assertEqual( +            ['2013-10-17/', 'latest/'], +            oracle._load_index(_toupper(self._known_html_api_versions))) +        self.assertEqual( +            ['meta_data.json', 'user_data'], +            oracle._load_index(_toupper(self._known_html_contents))) + +    def test_parse_newline_list_with_endl(self): +        """Test parsing of newline separated list with ending newline.""" +        self.assertEqual( +            ['2013-10-17/', 'latest/'], +            oracle._load_index("\n".join(["2013-10-17/", "latest/", ""]))) +        self.assertEqual( +            ['meta_data.json', 'user_data'], +            oracle._load_index("\n".join(["meta_data.json", "user_data", ""]))) + +    def test_parse_newline_list_without_endl(self): +        """Test parsing of newline separated list with no ending newline. + +        Actual openstack implementation does not include trailing newline.""" +        self.assertEqual( +            ['2013-10-17/', 'latest/'], +            oracle._load_index("\n".join(["2013-10-17/", "latest/"]))) +        self.assertEqual( +            ['meta_data.json', 'user_data'], +            oracle._load_index("\n".join(["meta_data.json", "user_data"]))) + + +# vi: ts=4 expandtab | 
